Skip to content

Commit 939faec

Browse files
fix(graphgen): generate qa from description
1 parent 0402647 commit 939faec

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

graphgen/operators/traverse_graph.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,25 +327,29 @@ async def _generate_question(
327327
):
328328
if len(node_or_edge) == 2:
329329
des = node_or_edge[0] + ": " + node_or_edge[1]['description']
330-
answer = node_or_edge[1]['description']
331330
loss = node_or_edge[1]['loss']
332331
else:
333332
des = node_or_edge[2]['description']
334-
answer = node_or_edge[2]['description']
335333
loss = node_or_edge[2]['loss']
336334

337335
async with semaphore:
338336
try:
339337
language = "Chinese" if detect_main_language(des) == "zh" else "English"
340-
question = await llm_client.generate_answer(
341-
QUESTION_GENERATION_PROMPT[language]['SINGLE_TEMPLATE'].format(
342-
answer=des
338+
339+
qa = await llm_client.generate_answer(
340+
QUESTION_GENERATION_PROMPT[language]['SINGLE_QA_TEMPLATE'].format(
341+
doc=des
343342
)
344343
)
345-
if question.startswith("Question:"):
346-
question = question[len("Question:"):].strip()
347-
elif question.startswith("问题:"):
348-
question = question[len("问题:"):].strip()
344+
345+
if "Question:" in qa and "Answer:" in qa:
346+
question = qa.split("Question:")[1].split("Answer:")[0].strip()
347+
answer = qa.split("Answer:")[1].strip()
348+
elif "问题:" in qa and "答案:" in qa:
349+
question = qa.split("问题:")[1].split("答案:")[0].strip()
350+
answer = qa.split("答案:")[1].strip()
351+
else:
352+
return {}
349353

350354
question = question.strip("\"")
351355
answer = answer.strip("\"")
@@ -370,9 +374,7 @@ async def _generate_question(
370374

371375
edges, nodes = await _pre_tokenize(graph_storage, tokenizer, edges, nodes)
372376

373-
# TODO: 需要把node的name也加进去,或者只用edge,两种都试一下
374377
tasks = []
375-
# des中可能会有SEP分割符
376378
for node in nodes:
377379
if "<SEP>" in node[1]['description']:
378380
description_list = node[1]['description'].split("<SEP>")

templates/question_generation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# pylint: disable=C0301
2-
32
TEMPLATE_SINGLE_EN: str = """The answer to a question is provided. Please generate a question that corresponds to the answer.
43
54
################

0 commit comments

Comments
 (0)