1
- """
2
- ___________________________________________________________________________________________________________________________________________________
3
- | |
4
- | To use this script, please check the README.md file in the directory. A quick start to get the project running is described here. |
5
- | |
6
- | 1. Create a Groq account and get your API key at https://console.groq.com/login. |
7
- | |
8
- | 2. Either: |
9
- | - Add your API key directly to line 38: API_KEY = "your_groq_api_key_here", or |
10
- | - Create a .env file in the same directory, and add GROQ_API_KEY=your_groq_api_key_here. |
11
- | |
12
- | 3. Place all your PDFs in a folder named 'Source' in the same directory as this script. |
13
- | |
14
- | 4. Run the script: |
15
- | python quiz_generator.py |
16
- | |
17
- | The generated MCQ quiz will be saved in a file called 'generated_mcq_quiz.txt'. |
18
- |_________________________________________________________________________________________________________________________________________________|
19
- """
20
-
21
-
22
- # Change this if you want to set the number of MCQ's
23
- num_questions = 5
24
-
25
-
26
1
import os
27
2
from PyPDF2 import PdfReader
28
3
from datetime import datetime
33
8
from langchain_huggingface import HuggingFaceEmbeddings
34
9
from langchain .text_splitter import CharacterTextSplitter
35
10
36
-
37
11
load_dotenv (find_dotenv ())
38
12
API_KEY = os .environ ["GROQ_API_KEY" ]
39
13
14
+ # Change this if you want to set the number of MCQs
15
+ num_questions = 5
16
+
40
17
41
18
def extract_text_from_pdfs ():
42
- print (f"Extracting text from PDF files in the folder: 'Source'..." )
19
+ """Extracts text from PDF files in the 'Source' folder."""
20
+ print ("Extracting text from PDF files in the folder: 'Source'..." )
43
21
all_text = []
44
-
45
- if not os . path . exists ( 'Source' ) or not os .listdir ('Source' ):
46
- print ("Folder ' Source' is empty or not found !" )
22
+
23
+ if len ( os .listdir ('Source' )) == 0 :
24
+ print ("Source Folder Empty !" )
47
25
print ("Process exiting..." )
48
26
exit (0 )
49
-
27
+
50
28
for file_name in os .listdir ('Source' ):
51
29
if file_name .endswith (".pdf" ):
52
30
file_path = os .path .join ('Source' , file_name )
@@ -57,8 +35,10 @@ def extract_text_from_pdfs():
57
35
print ("Text extraction completed." )
58
36
return " " .join (all_text )
59
37
38
+
60
39
def generate_unique_mcq (text , num_questions = 5 ):
61
- print (f"Splitting text into chunks and creating embeddings for LLM processing..." )
40
+ """Generates unique multiple choice questions from text."""
41
+ print ("LLM processing..." )
62
42
text_splitter = CharacterTextSplitter (
63
43
chunk_size = 1000 ,
64
44
chunk_overlap = 0
@@ -82,42 +62,48 @@ def generate_unique_mcq(text, num_questions=5):
82
62
)
83
63
84
64
quiz = []
85
- query = f"Generate { num_questions } unique multiple choice questions from the following text: { text } " \
86
- f"Provide 4 answer options and also the correct answer in plaintext."
87
-
65
+ query = (
66
+ f"Generate { num_questions } unique multiple choice questions"
67
+ "from the text: {text}"
68
+ "Provide 4 answer options and also the correct answer in plaintext."
69
+ )
70
+
88
71
response = retrieval_chain .invoke (query )
89
72
question_and_options = response ['result' ]
90
73
quiz .append (question_and_options )
91
74
92
75
print ("MCQ generation completed." )
93
76
return quiz
94
77
78
+
95
79
def save_mcq_to_file (quiz , file_name = "generated_mcq_quiz.txt" ):
80
+ """Saves generated MCQs to a text file."""
96
81
output_folder = "Generated_Quizes"
97
-
82
+
98
83
if not os .path .exists (output_folder ):
99
84
os .makedirs (output_folder )
100
85
print (f"Folder '{ output_folder } ' created." )
101
-
86
+
102
87
current_time = datetime .now ().strftime ("%Y-%m-%d_%H-%M-%S" )
103
88
file_name = f"generated_mcq_quiz_{ current_time } .txt"
104
89
file_path = os .path .join (output_folder , file_name )
105
-
90
+
106
91
print (f"Saving the generated MCQs to file: '{ file_path } '..." )
107
92
with open (file_path , "w" ) as f :
108
93
for i , question in enumerate (quiz , 1 ):
109
94
f .write (f"Question { i } :\n { question } \n \n " )
110
-
95
+
111
96
print (f"MCQ Quiz saved to { file_path } " )
112
97
98
+
113
99
if __name__ == "__main__" :
114
100
if not os .path .exists ('Source' ):
115
- print (f "Folder 'Source' not found." )
101
+ print ("Folder 'Source' not found." )
116
102
else :
117
- print (f "Folder 'Source' found. Starting process..." )
103
+ print ("Folder 'Source' found. Starting process..." )
118
104
text = extract_text_from_pdfs ()
119
105
print ("Text extracted from PDFs." )
120
-
106
+
121
107
mcq_quiz = generate_unique_mcq (text , num_questions = num_questions )
122
108
save_mcq_to_file (mcq_quiz )
123
109
print ("Process completed successfully." )
0 commit comments