-
Notifications
You must be signed in to change notification settings - Fork 26
Get reformulator working #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: public
Are you sure you want to change the base?
Changes from 2 commits
f7a97ce
a3fdc97
212b88c
c364342
0c0aec0
cf2fca5
811d4e0
f34de04
25813f6
f907ea1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -269,12 +269,12 @@ def reformulate(self): | |
if self.exclude_version: | ||
query += f" -AnkiReformulator:\"*version*=*'{self.VERSION}'*\"" | ||
|
||
# load db just in case | ||
# load db just in case, and create one if it doesn't already exist | ||
self.db_content = self.load_db() | ||
if not self.db_content: | ||
red("Empty database. If you have already ran anki_reformulator " | ||
"before then something went wrong!") | ||
whi("Trying to create a new database") | ||
whi("Creating a empty database") | ||
self.save_to_db({}) | ||
self.db_content = self.load_db() | ||
assert self.db_content, "Could not create database" | ||
|
@@ -507,7 +507,7 @@ def reformulate_note(self, nid: int, note: pd.Series) -> Dict: | |
# reformulate the content | ||
content = note["fields"][self.field_name]["value"] | ||
log["note_field_content"] = content | ||
formattedcontent = self.cloze_input_parser(content) if iscloze(content) else content | ||
formattedcontent = self.cloze_input_parser(content) | ||
log["note_field_formattedcontent"] = formattedcontent | ||
|
||
# if the card is in the dataset, just take the dataset value directly | ||
|
@@ -537,11 +537,13 @@ def reformulate_note(self, nid: int, note: pd.Series) -> Dict: | |
fc, media = replace_media( | ||
content=formattedcontent, | ||
media=None, | ||
mode="remove_media", | ||
) | ||
mode="remove_media") | ||
log["media"] = media | ||
|
||
if not skip_llm: | ||
if skip_llm: | ||
log["llm_answer"] = {"Skipped": True} | ||
log["dollar_price"] = 0 | ||
else: | ||
dataset = copy.deepcopy(self.dataset) | ||
curr_mess = [{"role": "user", "content": fc}] | ||
dataset = semantic_prompt_filtering( | ||
|
@@ -553,8 +555,7 @@ def reformulate_note(self, nid: int, note: pd.Series) -> Dict: | |
embedding_model=self.embedding_model, | ||
whi=whi, | ||
yel=yel, | ||
red=red, | ||
) | ||
red=red) | ||
dataset += curr_mess | ||
|
||
assert dataset[0]["role"] == "system", "First message is not from system!" | ||
|
@@ -597,20 +598,14 @@ def reformulate_note(self, nid: int, note: pd.Series) -> Dict: | |
) | ||
else: | ||
log["dollar_price"] = "?" | ||
else: | ||
log["llm_answer"] = {"Skipped": True} | ||
log["dollar_price"] = 0 | ||
|
||
log["note_field_newcontent"] = newcontent | ||
formattednewcontent = self.cloze_output_parser(newcontent) if iscloze(newcontent) else newcontent | ||
formattednewcontent = self.cloze_output_parser(newcontent) | ||
log["note_field_formattednewcontent"] = formattednewcontent | ||
log["status"] = STAT_OK_REFORM | ||
|
||
if iscloze(content) and iscloze( newcontent + formattednewcontent): | ||
# check that no cloze were lost | ||
# TODO: Bug here: `iscloze` can return true if the new content is a | ||
# close, but if the original content is not a cloze, then this | ||
# fails | ||
for cl in getclozes(content): | ||
cl = cl.split("::")[0] + "::" | ||
assert cl.startswith("{{c") and cl in content | ||
|
@@ -734,18 +729,14 @@ def reset_note(self, nid: int, note: pd.Series) -> Dict: | |
] | ||
|
||
if not entries: | ||
red( | ||
f"Entry not found for note {nid}. Looking for the content of " | ||
"the field AnkiReformulator" | ||
) | ||
red(f"Entry not found for note {nid}. Looking for the content of " | ||
"the field AnkiReformulator") | ||
logfield = note["fields"]["AnkiReformulator"]["value"] | ||
logfield = logfield.split( | ||
"<!--SEPARATOR-->")[0] # keep most recent | ||
if not logfield.strip(): | ||
raise Exception( | ||
f"Note {nid} was not found in the db and its " | ||
"AnkiReformulator field was empty." | ||
) | ||
raise Exception(f"Note {nid} was not found in the db and its " | ||
"AnkiReformulator field was empty.") | ||
|
||
# replace the [[c1::cloze]] by {{c1::cloze}} | ||
logfield = logfield.replace("]]", "}}") | ||
|
@@ -755,7 +746,7 @@ def reset_note(self, nid: int, note: pd.Series) -> Dict: | |
|
||
# parse old content | ||
buffer = [] | ||
for i, line in enumerate(logfield.split("<br>")): | ||
for line in logfield.split("<br>"): | ||
if buffer: | ||
try: | ||
_ = rtoml.loads("".join(buffer + [line])) | ||
|
@@ -774,10 +765,12 @@ def reset_note(self, nid: int, note: pd.Series) -> Dict: | |
|
||
# parse new content at the time | ||
buffer = [] | ||
for i, line in enumerate(logfield.split("<br>")): | ||
for line in logfield.split("<br>"): | ||
if buffer: | ||
try: | ||
_ = rtoml.loads("".join(buffer + [line])) | ||
# TODO: What are you trying to do here? Just check that adding the line keeps valid toml? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so. Irrc the thing with toml was to have a human readable way to see what happened using the addon. As a lot of log is packed into it I also added code to try rolling back if --reset was used and in case the db failed to recover. I can be fine with only storing data to the db, but also with storing all metadata of all scripts into a single field. Btw rtoml was better in some aspects than toml but I remember a bit having much trouble in some situation (lile dumping then loading resulting in different values especially when None are involved but can't remember more specifically.) |
||
# If so, you should catch the specific exception that the load function raises on error | ||
rtoml.loads("".join(buffer + [line])) | ||
buffer.append(line) | ||
continue | ||
except Exception: | ||
|
@@ -931,10 +924,8 @@ def apply_reset(self, log: Dict) -> None: | |
|
||
# remove TO_RESET tag if present | ||
removetags(nid, "AnkiReformulator::TO_RESET") | ||
|
||
# remove Done tag | ||
removetags(nid, "AnkiReformulator::Done") | ||
|
||
# remove DOING tag | ||
removetags(nid, "AnkiReformulator::RESETTING") | ||
|
||
|
@@ -987,11 +978,8 @@ def load_db(self) -> Dict: | |
cursor = conn.cursor() | ||
cursor.execute("SELECT data FROM dictionaries") | ||
rows = cursor.fetchall() | ||
dictionaries = [] | ||
for row in rows: | ||
dictionary = json.loads(zlib.decompress(row[0])) | ||
dictionaries.append(dictionary) | ||
return dictionaries | ||
# TODO: Why do you compress? This just makes it more difficult to debug | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Intially I just dumped json but the size got out of hand surprisingly quickly so I compressed it with zlib and found out I might as well use sqlite. This was totally amateurish, if I had to do it again I would use sqlite only and enable the built in compressions of course. But still being technically an amateur I'm open to any suggestion of course |
||
return [json.loads(zlib.decompress(row[0])) for row in rows] | ||
|
||
|
||
if __name__ == "__main__": | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo, should be "an"