Skip to content

Commit e77065b

Browse files
Merge pull request #77 from open-sciencelab/fix/fix-logprobs
fix: fix loss_entropy
2 parents 6e4a142 + 2ff8f7a commit e77065b

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

graphgen/utils/calculate_confidence.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ def _normalize_yes_no(tokens: List[Token]) -> Dict[str, float]:
6161
{"yes": 0.8, "no": 0.2}
6262
Among them, "yes" and "yeah" are synonyms for "yes",
6363
while "no" and "nope" are synonyms for "no".
64-
If neither "yes" nor "no" synonyms are present, it returns:
65-
{"yes": 0.5, "no": 0.5}
64+
If no "yes" or "no" synonyms are present, it will be judged as uncertain.
65+
An uncertain result will also be considered as opposite to the ground truth.
6666
"""
6767
yes_syno = {
6868
# English yes synonyms
@@ -126,17 +126,23 @@ def _normalize_yes_no(tokens: List[Token]) -> Dict[str, float]:
126126

127127
yes_prob = 0.0
128128
no_prob = 0.0
129+
uncertain_prob = 0.0
129130
for tok in tokens:
130131
t = tok.text.lower().strip()
131132
if t in yes_syno:
132133
yes_prob += tok.prob
133134
elif t in no_syno:
134135
no_prob += tok.prob
136+
else:
137+
uncertain_prob += tok.prob
138+
139+
total = yes_prob + no_prob + uncertain_prob
135140

136-
total = yes_prob + no_prob
137-
if total == 0:
138-
return {"yes": 0.5, "no": 0.5}
139-
return {"yes": yes_prob / total, "no": no_prob / total}
141+
return {
142+
"yes": yes_prob / total,
143+
"no": no_prob / total,
144+
"uncertain": uncertain_prob / total,
145+
}
140146

141147

142148
def yes_no_loss_entropy(

0 commit comments

Comments
 (0)