Skip to content

Commit ddcb045

Browse files
committed
Created main application
1 parent 4d3ad1d commit ddcb045

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed

llm_quiz_generator/main.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""
2+
___________________________________________________________________________________________________________________________________________________
3+
| |
4+
| To use this script, please check the README.md file in the directory. A quick start to get the project running is described here. |
5+
| |
6+
| 1. Create a Groq account and get your API key at https://console.groq.com/login. |
7+
| |
8+
| 2. Either: |
9+
| - Add your API key directly to line 38: API_KEY = "your_groq_api_key_here", or |
10+
| - Create a .env file in the same directory, and add GROQ_API_KEY=your_groq_api_key_here. |
11+
| |
12+
| 3. Place all your PDFs in a folder named ''Source'' in the same directory as this script. |
13+
| |
14+
| 4. Run the script: |
15+
| python quiz_generator.py |
16+
| |
17+
| The generated MCQ quiz will be saved in a file called 'generated_mcq_quiz.txt'. |
18+
|_________________________________________________________________________________________________________________________________________________|
19+
"""
20+
21+
22+
# Change this if you want to set the number of MCQ's
23+
num_questions = 5
24+
25+
26+
27+
import os
28+
from PyPDF2 import PdfReader
29+
from langchain_groq import ChatGroq
30+
from langchain.chains import RetrievalQA
31+
from dotenv import load_dotenv, find_dotenv
32+
from langchain_community.vectorstores import FAISS
33+
from langchain_huggingface import HuggingFaceEmbeddings
34+
from langchain.text_splitter import CharacterTextSplitter
35+
36+
37+
load_dotenv(find_dotenv())
38+
API_KEY = os.environ["GROQ_API_KEY"]
39+
40+
41+
def extract_text_from_pdfs():
42+
print(f"Extracting text from PDF files in the folder: '{'Source'}'...")
43+
all_text = []
44+
for file_name in os.listdir('Source'):
45+
if file_name.endswith(".pdf"):
46+
file_path = os.path.join('Source', file_name)
47+
print(f"Processing file: {file_name}")
48+
reader = PdfReader(file_path)
49+
for page in reader.pages:
50+
all_text.append(page.extract_text())
51+
print("Text extraction completed.")
52+
return " ".join(all_text)
53+
54+
55+
56+
def generate_unique_mcq(text, num_questions=5):
57+
print(f"Splitting text into chunks and creating embeddings for LLM processing...")
58+
text_splitter = CharacterTextSplitter(
59+
chunk_size=1000,
60+
chunk_overlap=0
61+
)
62+
docs = text_splitter.create_documents([text])
63+
64+
embeddings = HuggingFaceEmbeddings()
65+
store = FAISS.from_documents(docs, embeddings)
66+
67+
print(f"Connecting to LLM to generate {num_questions} unique MCQs...")
68+
llm = ChatGroq(
69+
temperature=0.2,
70+
model="llama-3.1-70b-versatile",
71+
api_key=API_KEY
72+
)
73+
74+
retrieval_chain = RetrievalQA.from_chain_type(
75+
llm=llm,
76+
chain_type="stuff",
77+
retriever=store.as_retriever()
78+
)
79+
80+
quiz = []
81+
query = f"Generate {num_questions} unique multiple choice questions from the following text: {text} " \
82+
f"Provide 4 answer options and also the correct answer in plaintext."
83+
84+
response = retrieval_chain.invoke(query)
85+
question_and_options = response['result']
86+
quiz.append(question_and_options)
87+
88+
print("MCQ generation completed.")
89+
return quiz
90+
91+
92+
93+
def save_mcq_to_file(quiz, file_name="generated_mcq_quiz.txt"):
94+
output_folder = "Generated_Quizes"
95+
96+
if not os.path.exists(output_folder):
97+
os.makedirs(output_folder)
98+
print(f"Folder '{output_folder}' created.")
99+
100+
file_path = os.path.join(output_folder, file_name)
101+
102+
print(f"Saving the generated MCQs to file: '{file_path}'...")
103+
with open(file_path, "w") as f:
104+
for i, question in enumerate(quiz, 1):
105+
f.write(f"Question {i}:\n{question}\n\n")
106+
107+
print(f"MCQ Quiz saved to {file_path}")
108+
109+
110+
111+
if __name__ == "__main__":
112+
if not os.path.exists('Source'):
113+
print(f"Folder '{'Source'}' not found.")
114+
else:
115+
print(f"Folder '{'Source'}' found. Starting process...")
116+
text = extract_text_from_pdfs()
117+
print("Text extracted from PDFs.")
118+
119+
mcq_quiz = generate_unique_mcq(text, num_questions=num_questions)
120+
save_mcq_to_file(mcq_quiz)
121+
print("Process completed successfully.")

0 commit comments

Comments
 (0)