Skip to content

Commit bddd690

Browse files
committed
email info with unigram and bigram percentage
1 parent 967c5c2 commit bddd690

File tree

5 files changed

+102
-33
lines changed

5 files changed

+102
-33
lines changed

src/_locales/de/messages.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
"message": "Wahrscheinlichkeit"
127127
},
128128
"emailinfo_label_known_tokens": {
129-
"message": "Bekannte Tokens"
129+
"message": "Bekannte Token (Uni-/Bigrams)"
130130
},
131131
"emailinfo_top_tokens": {
132132
"message": "Top Tokens für Schlagwort \"$1\""

src/_locales/en/messages.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
"message": "Probability"
127127
},
128128
"emailinfo_label_known_tokens": {
129-
"message": "Known Tokens"
129+
"message": "Known Token (Uni-/Bigrams)"
130130
},
131131
"emailinfo_top_tokens": {
132132
"message": "Top Tokens for Tag \"$1\""

src/background.js

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,10 @@ function onNewMailReceived(folder, messages) {
833833
}
834834

835835

836+
/**
837+
* Displays Bayes information for a specific email message.
838+
* @param {string} messageId - The ID of the email message.
839+
*/
836840
function showEMailInfo(messageId) {
837841
console.log(`Displaying Bayes info for message ID: ${messageId}`);
838842

@@ -854,26 +858,29 @@ function showEMailInfo(messageId) {
854858
const tokenContributions = probabilityData.tokenContributions;
855859

856860
// Berechnung des bekannten Token-Prozentsatzes
857-
let knownPercentage = 0;
861+
let knownUnigramsPercentage = 0;
862+
let knownBigramsPercentage = 0;
858863
if (bayesData[tagName].tokenList) {
859-
const knownTokenData = getKnownTokenPercentage(tokens, bayesData[tagName].tokenList);
860-
knownPercentage = knownTokenData.knownPercentage;
864+
// Verwende die neue Funktion calculateKnownTokenTypesPercentage
865+
const knownTokenData = calculateKnownTokenTypesPercentage(tokens, bayesData[tagName].tokenList);
866+
knownUnigramsPercentage = knownTokenData.knownUnigramsPercentage;
867+
knownBigramsPercentage = knownTokenData.knownBigramsPercentage;
861868
}
862869

863870
probabilities.push({
864871
tag: tagName,
865872
tagKey: tagKey,
866873
probability: (probability * 100).toFixed(2),
867874
tokenContributions: tokenContributions,
868-
knownTokenPercentage: knownPercentage.toFixed(2)
875+
knownUnigramsPercentage: knownUnigramsPercentage.toFixed(2),
876+
knownBigramsPercentage: knownBigramsPercentage.toFixed(2)
869877
});
870878

871879
console.log(
872880
`Probability for ${tagName}: ${(probability * 100).toFixed(2)}%`
873881
);
874-
console.log(
875-
`Known Token Percentage for ${tagName}: ${knownPercentage.toFixed(2)}%`
876-
);
882+
console.log(`Known Unigrams/Bigrams for ${tagName}: ${knownUnigramsPercentage.toFixed(2)}% / ${knownBigramsPercentage.toFixed(2)}%`);
883+
877884
}
878885
});
879886

@@ -901,6 +908,7 @@ function showEMailInfo(messageId) {
901908
});
902909
}
903910

911+
904912
messenger.runtime.onMessage.addListener((message, sender, sendResponse) => {
905913
if (message.action === "refreshBayesData") {
906914
messenger.storage.local

src/email_info.js

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,16 @@ document.addEventListener("DOMContentLoaded", async () => {
7979
}
8080
});
8181

82+
8283
function displayEmailInfo() {
8384
const tableBody = document.getElementById("probabilities-table");
8485
const tokenTablesContainer = document.getElementById("token-tables-container");
8586
const toggleButton = document.getElementById("toggle-tokens-button");
86-
tokenTablesContainer.style.display = 'none'; // Token-Tabellen anfangs ausblenden
87+
tokenTablesContainer.style.display = 'none'; // Initially hide token tables
88+
8789
let tagKeyToNameMap = {};
8890
let tagNameToKeyMap = {};
89-
90-
// Setze HTML-Übersetzungen mit der trans-Funktion
91+
// Set translated texts using the trans function
9192
document.getElementById('email-info-title').textContent = trans("emailinfo_title");
9293
document.getElementById('emailinfo_label_tag').textContent = trans("emailinfo_label_tag");
9394
document.getElementById('emailinfo_label_probability').textContent = trans("emailinfo_label_probability");
@@ -100,7 +101,7 @@ function displayEmailInfo() {
100101
const probabilities = result.bayesInfoData || [];
101102
const bayesData = result.bayesData || {};
102103

103-
// Mapping von Tag-Key zu Tag-Name und umgekehrt erstellen
104+
// Create mappings from Tag-Key to Tag-Name and vice versa
104105
tags.forEach((tag) => {
105106
tagKeyToNameMap[tag.key] = tag.tag;
106107
tagNameToKeyMap[tag.tag] = tag.key;
@@ -109,56 +110,57 @@ function displayEmailInfo() {
109110
probabilities.forEach((item) => {
110111
const row = document.createElement("tr");
111112

113+
// Tag Cell
112114
const tagCell = document.createElement("td");
113115
tagCell.textContent = item.tag;
114116
row.appendChild(tagCell);
115117

118+
// Probability Cell
116119
const probCell = document.createElement("td");
117-
118120
if (!bayesData[item.tag]) {
119121
probCell.textContent = "50%";
120122
} else if (!bayesData[item.tag].trainingCount) {
121123
probCell.textContent = "50%";
122124
} else {
123125
probCell.textContent = item.probability + "%";
124126
}
125-
126127
row.appendChild(probCell);
127128

128-
// Hinzufügen der Zelle für bekannte Tokens
129+
// Known Tokens Percentage Cell (Unigrams / Bigrams)
129130
const knownTokensCell = document.createElement("td");
130-
if (item.knownTokenPercentage !== undefined) {
131-
knownTokensCell.textContent = item.knownTokenPercentage + "%";
131+
if (item.knownUnigramsPercentage !== undefined && item.knownBigramsPercentage !== undefined) {
132+
knownTokensCell.textContent = `${item.knownUnigramsPercentage}% / ${item.knownBigramsPercentage}%`;
132133
} else {
133-
knownTokensCell.textContent = "0.00%"; // Fallback, falls nicht definiert
134+
knownTokensCell.textContent = "0.00% / 0.00%"; // Fallback if percentages are not defined
134135
}
135136
row.appendChild(knownTokensCell);
136137

137138
tableBody.appendChild(row);
138139

139-
// Verarbeitung der TokenContributions, um Top 5 positive und negative Tokens zu finden
140+
// Process Token Contributions to find Top 5 Positive and Negative Tokens
140141
const tokenContributions = item.tokenContributions || [];
141142

142-
// Filtere die Tokens, die in der E-Mail vorhanden sind
143+
// Filter tokens present in the email
143144
const tokensInEmail = tokenContributions.filter(tc => tc.isPresent);
144145

145-
// Top 5 positive Tokens (höchste positive Beiträge)
146+
// Top 10 Positive Tokens (highest positive contributions)
146147
const topPositiveTokens = tokensInEmail
147148
.filter(tc => tc.contribution > 0)
148149
.sort((a, b) => b.contribution - a.contribution)
149-
.slice(0, 5);
150+
.slice(0, 10);
150151

151-
// Top 5 negative Tokens (niedrigste negative Beiträge)
152+
// Top 10 Negative Tokens (lowest negative contributions)
152153
const topNegativeTokens = tokensInEmail
153154
.filter(tc => tc.contribution < 0)
154155
.sort((a, b) => a.contribution - b.contribution)
155-
.slice(0, 5);
156+
.slice(0, 10);
156157

157-
// Erstelle eine Tabelle für das aktuelle Schlagwort
158+
// Create a table for the current tag
158159
const tokenTable = document.createElement("table");
159160
tokenTable.style.marginTop = "15px";
160161
tokenTable.style.width = "100%";
161162

163+
// Table Header
162164
const tokenTableHeader = document.createElement("thead");
163165
const tokenTableHeaderRow = document.createElement("tr");
164166
const tokenTableHeaderCell = document.createElement("th");
@@ -169,6 +171,7 @@ function displayEmailInfo() {
169171
tokenTableHeader.appendChild(tokenTableHeaderRow);
170172
tokenTable.appendChild(tokenTableHeader);
171173

174+
// Table Subheader
172175
const tokenTableSubHeader = document.createElement("tr");
173176
const tokenSubHeaderToken = document.createElement("th");
174177
tokenSubHeaderToken.textContent = trans("emailinfo_token");
@@ -185,9 +188,10 @@ function displayEmailInfo() {
185188

186189
const tokenTableBody = document.createElement("tbody");
187190

188-
// Füge die positiven Tokens hinzu
191+
// Add Positive Tokens
189192
topPositiveTokens.forEach(tc => {
190193
const tr = document.createElement("tr");
194+
191195
const tdToken = document.createElement("td");
192196
tdToken.textContent = tc.token;
193197
tdToken.classList.add("positive-token");
@@ -197,15 +201,17 @@ function displayEmailInfo() {
197201

198202
const tdType = document.createElement("td");
199203
tdType.textContent = trans("emailinfo_positive");
204+
200205
tr.appendChild(tdToken);
201206
tr.appendChild(tdContribution);
202207
tr.appendChild(tdType);
203208
tokenTableBody.appendChild(tr);
204209
});
205210

206-
// Füge die negativen Tokens hinzu
211+
// Add Negative Tokens
207212
topNegativeTokens.forEach(tc => {
208213
const tr = document.createElement("tr");
214+
209215
const tdToken = document.createElement("td");
210216
tdToken.textContent = tc.token;
211217
tdToken.classList.add("negative-token");
@@ -215,6 +221,7 @@ function displayEmailInfo() {
215221

216222
const tdType = document.createElement("td");
217223
tdType.textContent = trans("emailinfo_negative");
224+
218225
tr.appendChild(tdToken);
219226
tr.appendChild(tdContribution);
220227
tr.appendChild(tdType);
@@ -225,31 +232,32 @@ function displayEmailInfo() {
225232
tokenTablesContainer.appendChild(tokenTable);
226233
});
227234

228-
// Entferne bayesInfoData nach der Anzeige
235+
// Remove bayesInfoData after display
229236
messenger.storage.local.remove(["bayesInfoData"]);
230237

231-
// Fenstergröße nach dem Laden des Inhalts anpassen
238+
// Adjust window size after loading content
232239
adjustWindowSize();
233240
}).catch((error) => {
234241
console.error("Error loading Bayes info data:", error);
235242
});
236243

237-
// Event Listener für den Toggle-Button hinzufügen
244+
// Add Event Listener to the Toggle Button
238245
if (toggleButton) {
239246
toggleButton.addEventListener('click', () => {
240247
if (tokenTablesContainer.style.display === 'none' || tokenTablesContainer.style.display === '') {
241248
tokenTablesContainer.style.display = 'block';
242-
toggleButton.innerHTML = '&#9650;'; // Pfeil nach oben
249+
toggleButton.innerHTML = '&#9650;'; // Up arrow
243250
} else {
244251
tokenTablesContainer.style.display = 'none';
245-
toggleButton.innerHTML = '&#9660;'; // Pfeil nach unten
252+
toggleButton.innerHTML = '&#9660;'; // Down arrow
246253
}
247254

248255
adjustWindowSize();
249256
});
250257
}
251258
}
252259

260+
253261
function adjustWindowSize() {
254262
// Warten, bis der Inhalt gerendert wurde
255263
setTimeout(() => {

src/utils.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,56 @@ function getKnownTokenPercentage(tokens, tokenList) {
6262

6363
return { knownPercentage };
6464
}
65+
66+
67+
/**
68+
* Calculates the percentage of known unigrams and bigrams in an email compared to the tokenList,
69+
* with duplicates removed from the tokens.
70+
* A token is considered a unigram if it does not contain an underscore "_",
71+
* and a bigram if it contains an underscore "_".
72+
*
73+
* @param {Array} tokens - List of tokens in the email.
74+
* @param {Object} tokenList - The existing token database for a specific tag (keyword).
75+
* @returns {Object} - An object with the percentage of known unigrams and bigrams.
76+
*/
77+
function calculateKnownTokenTypesPercentage(tokens, tokenList) {
78+
// Remove duplicates from the tokens
79+
const uniqueTokens = new Set(tokens);
80+
81+
// Initialize counters
82+
let knownUnigrams = 0;
83+
let totalUnigrams = 0;
84+
let knownBigrams = 0;
85+
let totalBigrams = 0;
86+
87+
// Iterate through each unique token once
88+
uniqueTokens.forEach(token => {
89+
if (token.includes('_')) {
90+
// It's a bigram
91+
totalBigrams++;
92+
if (tokenList[token]) {
93+
knownBigrams++;
94+
}
95+
} else {
96+
// It's a unigram
97+
totalUnigrams++;
98+
if (tokenList[token]) {
99+
knownUnigrams++;
100+
}
101+
}
102+
});
103+
104+
// Calculate percentages
105+
const knownUnigramsPercentage = totalUnigrams > 0 ? (knownUnigrams / totalUnigrams) * 100 : 0;
106+
const knownBigramsPercentage = totalBigrams > 0 ? (knownBigrams / totalBigrams) * 100 : 0;
107+
108+
// Log the results for debugging
109+
console.log(`Known unigrams: ${knownUnigramsPercentage.toFixed(2)}% (Known: ${knownUnigrams}, Total: ${totalUnigrams})`);
110+
console.log(`Known bigrams: ${knownBigramsPercentage.toFixed(2)}% (Known: ${knownBigrams}, Total: ${totalBigrams})`);
111+
112+
// Return the percentages
113+
return {
114+
knownUnigramsPercentage,
115+
knownBigramsPercentage
116+
};
117+
}

0 commit comments

Comments
 (0)