|
| 1 | +#md# The positions of words can be initialized with pre-trained word vectors. |
| 2 | +#md# ### Words |
| 3 | +using WordCloud |
| 4 | +stwords = ["us", "will"]; |
| 5 | +words_weights = processtext(open(pkgdir(WordCloud)*"/res/Barack Obama's First Inaugural Address.txt"), stopwords=WordCloud.stopwords_en ∪ stwords) |
| 6 | +words_weights = Dict(zip(words_weights...)) |
| 7 | +#md# ### Embeddings |
| 8 | +using Embeddings |
| 9 | +using TSne |
| 10 | +const embtable = load_embeddings(GloVe{:en}) |
| 11 | +const get_word_index = Dict(word=>ii for (ii,word) in enumerate(embtable.vocab)) |
| 12 | +function get_embedding(word) |
| 13 | + ind = get_word_index[word] |
| 14 | + emb = embtable.embeddings[:,ind] |
| 15 | + return emb |
| 16 | +end |
| 17 | +wordvec = Dict() |
| 18 | +for k in keys(words_weights) |
| 19 | + if k in keys(get_word_index) |
| 20 | + wordvec[k] = get_embedding(k) |
| 21 | + elseif lowercase(k) in keys(get_word_index) |
| 22 | + wordvec[k] = get_embedding(lowercase(k)) |
| 23 | + else |
| 24 | + pop!(words_weights, k) |
| 25 | + println("remove ", k) |
| 26 | + end |
| 27 | +end |
| 28 | +embedded = tsne(hcat(values(wordvec)...)', 2) |
| 29 | +#md# ### WordCloud |
| 30 | +sc = WordCloud.randomscheme() |
| 31 | +wc = wordcloud( |
| 32 | + words_weights, |
| 33 | + mask = shape(ellipse, 1000, 1000, backgroundcolor=(0,0,0,0), color=WordCloud.chooseabgcolor(sc)), |
| 34 | + colors = sc, |
| 35 | + run = initimages! |
| 36 | +) |
| 37 | + |
| 38 | +pos = embedded |
| 39 | +mean = sum(pos, dims=1) / size(pos, 1) |
| 40 | +r = maximum(sqrt.(pos[:,1].^2 + pos[:,2].^2 )) |
| 41 | +pos = (pos .- mean) ./ 2r |
| 42 | +sz = collect(size(wc.mask))' |
| 43 | +pos = round.(Int, pos .* sz .+ sz ./ 2) |
| 44 | + |
| 45 | +setpositions!(wc, keys(wordvec)|>collect, eachrow(pos), type=setcenter!) |
| 46 | +setstate!(wc, :placement!) |
| 47 | +generate!(wc, patient=-1) |
| 48 | +println("results are saved to embedding.png") |
| 49 | +paint(wc, "embedding.png") |
| 50 | +wc |
| 51 | +#eval# runexample(:embedding) |
| 52 | +#md#  |
0 commit comments