Skip to content

Commit cb97afb

Browse files
Ignored words work a little better.
1 parent 9c5b1fe commit cb97afb

File tree

4 files changed

+104
-26
lines changed

4 files changed

+104
-26
lines changed

static/js/pos-core-analyzer.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ const posAnalyzer = {
3232

3333
sentence.terms.forEach((term, index) => {
3434
const word = term.text.toLowerCase().trim();
35+
if (!word || ignoredWordsSet.has(word)) {
36+
return;
37+
}
3538
const tags = term.tags || [];
3639

3740
if (!word || ignoredWordsSet.has(word)) return;

static/js/track-initializer.js

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,54 @@ const trackInitializer = {
33
if (!lyricsElement) return;
44

55
const track = document.querySelector(`[id^="trackDetails${trackIndex}"]`);
6-
const ignoredWordsStr = track?.dataset.ignoredWords || '';
7-
const ignoredWords = ignoredWordsStr.split(',').map(w => w.trim().toLowerCase()).filter(Boolean);
8-
const ignoredWordsSet = new Set(ignoredWords);
9-
10-
const words = lyricsElement.innerHTML
11-
.split(/\n/)
12-
.map(line => line.trim()
13-
.split(/(\s+)/)
14-
.map(word => {
15-
const cleanedWord = cleanWord(word);
16-
if (cleanedWord.length > 0 && ignoredWordsSet.has(cleanedWord)) {
17-
return word;
18-
}
19-
if (cleanedWord.length > 0) {
20-
return this.createWordSpan(word, cleanedWord, trackIndex);
21-
}
22-
return word;
23-
})
24-
.join('')
25-
)
26-
.join('\n');
27-
28-
lyricsElement.innerHTML = words;
6+
7+
const ignoredWords = (track?.dataset.ignoredWords || '').split(',').map(w => w.trim()).filter(Boolean);
8+
const patterns = new Set(ignoredWords.filter(w => /[()[\]{}:]/.test(w)));
9+
const exactWords = new Set(ignoredWords.filter(w => !/[()[\]{}:]/.test(w)));
10+
11+
const expandedIgnoredSet = new Set(exactWords);
12+
exactWords.forEach(word => {
13+
const cleaned = cleanWord(word);
14+
if (cleaned) expandedIgnoredSet.add(cleaned);
15+
});
16+
17+
const lines = lyricsElement.innerHTML.split(/\n/);
18+
const processedLines = lines.map(line => {
19+
let result = line;
20+
21+
patterns.forEach(ignored => {
22+
const escapedIgnored = ignored.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
23+
const regex = new RegExp(`(${escapedIgnored})`, 'g');
24+
result = result.replace(regex, (match) => match);
25+
});
26+
27+
const words = result.split(/(\s+)/);
28+
return words.map(segment => {
29+
if (!segment.trim()) return segment;
30+
31+
if (patterns.has(segment)) return segment;
32+
33+
const cleanedWord = cleanWord(segment);
34+
if (!cleanedWord) return segment;
35+
36+
if (expandedIgnoredSet.has(cleanedWord)) return segment;
37+
38+
return this.createWordSpan(segment, cleanedWord, trackIndex);
39+
}).join('');
40+
});
41+
42+
lyricsElement.innerHTML = processedLines.join('\n');
43+
44+
const wordCountContainer = track.querySelector('.word-counts');
45+
if (wordCountContainer) {
46+
const wordCountElements = wordCountContainer.querySelectorAll('[id^="wordCount"]');
47+
wordCountElements.forEach(element => {
48+
const word = element.getAttribute('data-word');
49+
if (expandedIgnoredSet.has(word)) {
50+
element.remove();
51+
}
52+
});
53+
}
2954
},
3055

3156
createWordSpan(originalWord, cleanedWord, trackIndex) {
@@ -158,4 +183,35 @@ function copyDebugInfo(trackIndex) {
158183
});
159184
}
160185
window.copyDebugInfo = copyDebugInfo;
161-
window.trackInitializer = trackInitializer;
186+
window.trackInitializer = trackInitializer;
187+
188+
function copyIgnoredWordsInfo(trackIndex) {
189+
const trackElement = document.querySelector(`[id^="trackDetails${trackIndex}"]`);
190+
const ignoredWordsStr = trackElement?.dataset.ignoredWords || '';
191+
const lyrics = document.getElementById(`lyrics${trackIndex}`).innerHTML;
192+
193+
const info = {
194+
trackIndex,
195+
ignoredWords: ignoredWordsStr,
196+
ignoredWordsList: ignoredWordsStr.split(',').map(w => w.trim()).filter(Boolean),
197+
trackHTML: lyrics,
198+
wordElements: Array.from(document.querySelectorAll(`.word[data-track="${trackIndex}"]`))
199+
.map(el => ({
200+
word: el.getAttribute('data-word'),
201+
isInteractive: true
202+
})),
203+
textNodes: Array.from(document.getElementById(`lyrics${trackIndex}`).childNodes)
204+
.filter(node => node.nodeType === 3)
205+
.map(node => node.textContent.trim())
206+
.filter(text => text.length > 0)
207+
};
208+
209+
navigator.clipboard.writeText(JSON.stringify(info, null, 2)).then(() => {
210+
const button = document.querySelector(`button[onclick="copyIgnoredWordsInfo(${trackIndex})"]`);
211+
button.textContent = 'Copied!';
212+
setTimeout(() => {
213+
button.textContent = 'Debug Ignored Words';
214+
}, 2000);
215+
});
216+
}
217+
window.copyIgnoredWordsInfo = copyIgnoredWordsInfo;

templates/frontend/pages/album-details.html

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ <h3 class="text-lg font-bold">
161161
>
162162
Copy Debug Info
163163
</button>
164+
<button onclick="copyIgnoredWordsInfo({{ $trackIndex }})" class="px-2 py-1 text-xs bg-purple-700 text-white rounded hover:bg-purple-600 transition-colors">
165+
Debug Ignored Words
166+
</button>
164167
{{ end }}
165168
</div>
166169

words/words.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package words
22

33
import (
44
"millions-of-words/models"
5+
"regexp"
56
"sort"
67
"strings"
78
"unicode"
@@ -21,21 +22,36 @@ func CalculateAndSortWordFrequencies(lyrics string, ignoredWords string) ([]mode
2122
}
2223

2324
ignoredWordsMap := make(map[string]bool)
25+
var ignoredPatterns []string
2426
if ignoredWords != "" {
2527
for _, word := range strings.Split(ignoredWords, ",") {
2628
word = strings.TrimSpace(word)
2729
if word != "" {
28-
ignoredWordsMap[word] = true
30+
if strings.ContainsAny(word, "()[]{}:") {
31+
ignoredPatterns = append(ignoredPatterns, word)
32+
} else {
33+
ignoredWordsMap[word] = true
34+
cleaned := CleanWord(word)
35+
if cleaned != "" {
36+
ignoredWordsMap[cleaned] = true
37+
}
38+
}
2939
}
3040
}
3141
}
3242

43+
processedLyrics := lyrics
44+
for _, pattern := range ignoredPatterns {
45+
escapedPattern := regexp.QuoteMeta(pattern)
46+
processedLyrics = regexp.MustCompile(escapedPattern).ReplaceAllString(processedLyrics, "")
47+
}
48+
3349
wordCounts := make(map[string]int)
3450
vowelCount := 0
3551
consonantCount := 0
3652
wordLengthDistribution := make(map[int]int)
3753

38-
words := splitLyricsIntoWords(removeItalics(lyrics))
54+
words := splitLyricsIntoWords(removeItalics(processedLyrics))
3955

4056
for _, word := range words {
4157
cleanedWord := CleanWord(word)

0 commit comments

Comments
 (0)