Replies: 1 comment 2 replies
-
These models are not guaranteed to give perfectly structured json output. You should look into grammar constraints, the guidance library from Microsoft, or use a larger or paid model that has better instruct adherence. Otherwise, it's often just a matter of trying a few times. If the json output is not correct, you can try again. |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
below code was working fine and give results
from llama_cpp import Llama
import timeit
from PyPDF2 import PdfReader
cc_path1 = r'data\contract_note\15.11.2023 S_E_CONTRACT_8560017_20231115.PDF'
cc_path2 = r"D:\llama_cpp\data\sample_cc_zerodha_updated.pdf"
reader = PdfReader(cc_path1)
number_of_pages = len(reader.pages)
print(number_of_pages)
page = reader.pages[0]
text = page.extract_text()
import pdfplumber
print(text)
text1 = text.splitlines()
new_text1 =" ".join(text1)
print(new_text1)
start = timeit.default_timer()
prompt1 = f'''Extract the Broker name, Client ID, Contract Note No., Trade Date, Exchange/Clg. Corp, Order No., Order Time, Trade No, Trade Time, Security/Contract Description, ISIN CODE, Buy/Sell, Quantity, Gross Rate/Trade Price per unit (Rs) @, SEBI Turnover Fee, Brokerage per Unit (Rs), Closing Rate per Unit (Only for Derivatives) (Rs), Net Total (Before Levies) (Rs), Payin/PayOut Obligation, Minimum Charges, IGST, CGST, SGST, STT, SEBI T/O Fees, Exchange Clearing Chrgs, Other Charges Cash 2, Demat Charges , Stamp Duty, Other Charges, IPF, Total Net and Remark from following text and give output in json.
Desired format:
Data: {{"Data":["Broker name":null,"Client ID":null,"Contract Note No.":null,"Trade Date":null,"Exchange/Clg. Corp":null, "Order No.":null,"Order Time":null,"Trade No":null,"Trade Time":null,"Security/Contract Description":null,"ISIN CODE":null,"Buy/Sell":null,"Quantity":null,"Security/Contract Description":null,"Gross Rate/Trade Price per unit (Rs) @":null,"SEBI Turnover Fee":null,"Brokerage per Unit (Rs)":null,"Net Total (Before Levies) (Rs)":null, "Payin/PayOut Obligation":null, "Minimum Charges":null, "Exchange Tr. Chrg":null, "IGST":null, "CGST":null,"SGST":null, "STT":null, "SEBI T/O Fees":null, "Exchange Clearing Chrgs":null, "Other Charges Cash 2":null, "Demat Charges":null, "Stamp Duty":null,"Clearing Charges": null, "IPF": null, "Total Net": null, "Remark": null]}}
Input: {new_text1}'''
llm = Llama( model_path=r"models\Meta-Llama-3-8B-Instruct.Q5_K_S.gguf",n_ctx=4096,chat_format="chatml")
x = llm.create_chat_completion(
messages=[
{
"role": "system",
"content": "You are a helpful assistant that outputs in JSON.",
},
{"role": "user", "content": prompt1},
],
response_format={
"type": "json_object",
)
print(x['choices'][0]['message']['content'])
end = timeit.default_timer()
print(end-start)
pdf that i was provided contains tabular data and the prompt i want to extract these values fill the below json
{
"Data": [
{
"Broker name": null,
"Client ID": null,
"Contract Note No.": null,
"Trade Date": null,
"Exchange/Clg. Corp": null,
"Order No.": null,
"Order Time": null,
"Trade No.": null,
"Trade Time": null,
"Security/Contract Description": null,
"ISIN CODE": null,
"Buy/Sell": null,
"Quantity": null,
"Gross Rate/Trade Price per unit (Rs) @": null,
"SEBI Turnover Fee": null,
"Brokerage per Unit (Rs)": null,
"Closing Rate per Unit (Only for Derivatives) (Rs)": null,
"Net Total (Before Levies) (Rs)": null,
"Payin/PayOut Obligation": null,
"Minimum Charges": null,
"Exchange Tr. Chrg": null,
"IGST": null,
"CGST": null,
"SGST": null,
"STT": null,
"SEBI T/O Fees": null,
"Exchange Clearing Chrgs": null,
"Other Charges Cash 2": null,
"Demat Charges": null,
"Stamp Duty": null,
"Other Charges": null,
"Clearing Charges": null,
"IPF": null,
"Total Net": null,
"Remark": null
} now it is giving below result
{
"Data": [
{
"Broker name": "ITI Securities Broking Limited",
"Client ID": "8560017",
"Contract Note No.": "110071",
"Trade Date": "15-Nov-2023",
"Exchange/Clg. Corp": "NSE Capital Market",
"Order No.": "2023216",
"Order Time": "10:26:42",
"Trade No": "62293618",
"Trade Time": "10:26:42",
"Security/Contract Description": "TATA POWER CO LTD INE245A01021 N",
"ISIN CODE": "INE245A01021",
"Buy/Sell": "N",
"Quantity": 500,
"Gross Rate/Trade Price per unit (Rs) @": 260.9305,
"SEBI Turnover Fee": null,
"Brokerage per Unit (Rs)": null,
"Net Total (Before Levies) (Rs)": 130,
593.0463252999998. Please help
Beta Was this translation helpful? Give feedback.
All reactions