diff --git a/.gitignore b/.gitignore index 7342092..cf75843 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,13 @@ app/frontend/ app/launch_streamlit.py # Virtual environment -.venv/ \ No newline at end of file +.venv/freeform_data_claude_* +row_data_claude_* +freeform_data_claude_* +housing_example.json +seeds_test.json +test.csv +sample_200x100.csv +Raw_Web_Visit_sample.csv +Raw_Web_Visit_Sample.csv +Raw_Web_Visit_Sample.csv diff --git a/Raw_Web_Visit_Sample.csv b/Raw_Web_Visit_Sample.csv new file mode 100644 index 0000000..2e2e28e Binary files /dev/null and b/Raw_Web_Visit_Sample.csv differ diff --git a/app/core/config.py b/app/core/config.py index ea8d6f1..777a6e4 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Any from pydantic import BaseModel from fastapi import FastAPI, HTTPException, Request, status import requests @@ -14,6 +14,9 @@ class UseCase(str, Enum): CODE_GENERATION = "code_generation" TEXT2SQL = "text2sql" CUSTOM = "custom" + LENDING_DATA = "lending_data" + #HOUSING_DATA = "housing_data" + CREDIT_CARD_DATA = "credit_card_data" class Technique(str, Enum): SFT = "sft" @@ -46,10 +49,18 @@ class UseCaseMetadata(BaseModel): """Metadata for each use case""" name: str description: str - topics: Dict[str, TopicMetadata] - default_examples: List[Dict[str, str]] + topics: list[str] + default_examples: List[Dict[str, Any]] + prompt: Optional[str] = None schema: Optional[str] = None +class UseCaseMetadataEval(BaseModel): + """Metadata for each use case""" + name: str + default_examples: List[Dict[str, Any]] + prompt: Optional[str] = None + + DEFAULT_SQL_SCHEMA = """ CREATE TABLE employees ( @@ -77,29 +88,7 @@ class UseCaseMetadata(BaseModel): UseCase.CODE_GENERATION: UseCaseMetadata( name="Code Generation", description="Generate programming questions and solutions with code examples", - topics={ - "python_basics": TopicMetadata( - name="Python Basics", - description="Fundamental Python programming concepts", - example_questions=[ - { - "question": "How do you create a list in Python and add elements to it?", - "solution": "Here's how to create and modify a list in Python:\n\n```python\n# Create an empty list\nmy_list = []\n\n# Add elements using append\nmy_list.append(1)\nmy_list.append(2)\n\n# Create a list with initial elements\nmy_list = [1, 2, 3]\n```" - } - ] - ), - "data_structures": TopicMetadata( - name="Data Structures", - description="Common data structures implementation and usage", - example_questions=[ - { - "question": "How do you implement a stack using a list in Python?", - "solution": "Here's how to implement a basic stack:\n\n```python\nclass Stack:\n def __init__(self):\n self.items = []\n \n def push(self, item):\n self.items.append(item)\n \n def pop(self):\n if not self.is_empty():\n return self.items.pop()\n \n def is_empty(self):\n return len(self.items) == 0\n```" - } - ] - ), - # Add more topics... - }, + topics=["Python Basics", "Data Manipulation", "Web Development", "Machine Learning", "Algorithms"], default_examples=[ { "question": "How do you read a CSV file into a pandas DataFrame?", @@ -110,36 +99,29 @@ class UseCaseMetadata(BaseModel): "solution": "Here's how to define a function:\n\n```python\ndef greet(name):\n return f'Hello, {name}!'\n\n# Example usage\nresult = greet('Alice')\nprint(result) # Output: Hello, Alice!\n```" } ], - - schema=None + prompt= """ + Requirements: + - Each solution must include working code examples + - Include explanations with the code + - Follow the same format as the examples + - Ensure code is properly formatted with appropriate indentation + - Each object MUST have exactly these two fields: + - "question" + - "solution" + """, + schema=None ), - + UseCase.TEXT2SQL: UseCaseMetadata( name="Text to SQL", description="Generate natural language to SQL query pairs", - topics={ - "basic_queries": TopicMetadata( - name="Basic Queries", - description="Simple SELECT, INSERT, UPDATE, and DELETE operations", - example_questions=[ - { - "question": "How do you select all employees from the employees table?", - "solution": "Here's the SQL query:\n```sql\nSELECT *\nFROM employees;\n```" - } - ] - ), - "joins": TopicMetadata( - name="Joins", - description="Different types of JOIN operations", - example_questions=[ - { - "question": "How do you join employees and departments tables to get employee names with their department names?", - "solution": "Here's the SQL query:\n```sql\nSELECT e.name, d.department_name\nFROM employees e\nJOIN departments d ON e.department_id = d.id;\n```" - } - ] - ), - # Add more topics... - }, + topics=[ + "Basic Queries", + "Joins", + "Aggregations", + "Subqueries", + "Windows Functions" + ], default_examples=[ { "question": "Find all employees with salary greater than 50000", @@ -150,11 +132,796 @@ class UseCaseMetadata(BaseModel): "solution": "```\nSELECT department_id, AVG(salary) as avg_salary\nFROM employees\nGROUP BY department_id;\n```" } ], + prompt = """ + Requirements: + - Each solution must be a working SQL query + - Include explanations where needed + - Follow the same format as the examples + - Ensure queries are properly formatted + - Each object MUST have exactly these two fields: + - "question" + - "solution" + """, + schema=DEFAULT_SQL_SCHEMA + ), + + UseCase.CUSTOM: UseCaseMetadata( + name="Custom", + description="Custom use case for user-defined data generation", + topics=[], + default_examples=[], + prompt = " ", + schema=None + ), + + UseCase.LENDING_DATA: UseCaseMetadata( + name="Lending Data", + description="Generate synthetic lending data", + topics=['Business loans', 'Personal loans', 'Auto loans', 'Home equity loans', "Asset-backed loans"], + default_examples=[ + { + "loan_amnt": 10000.00, + "term": "36 months", + "int_rate": 11.44, + "installment": 329.48, + "grade": "B", + "sub_grade": "B4", + "emp_title": "Marketing", + "emp_length": "10+ years", + "home_ownership": "RENT", + "annual_inc": 117000.00, + "verification_status": "Not Verified", + "issue_d": "Jan-2015", + "loan_status": "Fully Paid", + "purpose": "vacation", + "title": "Vacation", + "dti": 26.24, + "earliest_cr_line": "Jun-1990", + "open_acc": 16.00, + "pub_rec": 0.00, + "revol_bal": 36369.00, + "revol_util": 41.80, + "total_acc": 25.00, + "initial_list_status": "w", + "application_type": "INDIVIDUAL", + "mort_acc": 0.00, + "pub_rec_bankruptcies": 0.00, + "address": "0185 Michelle Gateway\r\nMendozaberg, OK 22690" + }, + { + "loan_amnt": 8000.00, + "term": "36 months", + "int_rate": 11.99, + "installment": 265.68, + "grade": "B", + "sub_grade": "B5", + "emp_title": "Credit analyst", + "emp_length": "4 years", + "home_ownership": "MORTGAGE", + "annual_inc": 65000.00, + "verification_status": "Not Verified", + "issue_d": "Jan-2015", + "loan_status": "Fully Paid", + "purpose": "debt_consolidation", + "title": "Debt consolidation", + "dti": 22.05, + "earliest_cr_line": "Jul-2004", + "open_acc": 17.00, + "pub_rec": 0.00, + "revol_bal": 20131.00, + "revol_util": 53.30, + "total_acc": 27.00, + "initial_list_status": "f", + "application_type": "INDIVIDUAL", + "mort_acc": 3.00, + "pub_rec_bankruptcies": 0.00, + "address": "1040 Carney Fort Apt. 347\r\nLoganmouth, SD 05113" + }, + { + "loan_amnt": 15600.00, + "term": "36 months", + "int_rate": 10.49, + "installment": 506.97, + "grade": "B", + "sub_grade": "B3", + "emp_title": "Statistician", + "emp_length": "< 1 year", + "home_ownership": "RENT", + "annual_inc": 43057.00, + "verification_status": "Source Verified", + "issue_d": "Feb-2015", + "loan_status": "Fully Paid", + "purpose": "credit_card", + "title": "Credit card refinancing", + "dti": 12.79, + "earliest_cr_line": "Aug-2007", + "open_acc": 13.00, + "pub_rec": 0.00, + "revol_bal": 11987.00, + "revol_util": 92.20, + "total_acc": 26.00, + "initial_list_status": "f", + "application_type": "INDIVIDUAL", + "mort_acc": 0.00, + "pub_rec_bankruptcies": 0.00, + "address": "87000 Mark Dale Apt. 269\r\nNew Sabrina, WV 05113" + }, + { + "loan_amnt": 24375.00, + "term": "60 months", + "int_rate": 17.27, + "installment": 609.33, + "grade": "C", + "sub_grade": "C5", + "emp_title": "Destiny Management Inc.", + "emp_length": "9 years", + "home_ownership": "MORTGAGE", + "annual_inc": 55000.00, + "verification_status": "Verified", + "issue_d": "Apr-2013", + "loan_status": "Charged Off", + "purpose": "credit_card", + "title": "Credit Card Refinance", + "dti": 33.95, + "earliest_cr_line": "Mar-1999", + "open_acc": 13.00, + "pub_rec": 0.00, + "revol_bal": 24584.00, + "revol_util": 69.80, + "total_acc": 43.00, + "initial_list_status": "f", + "application_type": "INDIVIDUAL", + "mort_acc": 1.00, + "pub_rec_bankruptcies": 0.00, + "address": "512 Luna Roads\r\nGreggshire, VA 11650" + } + ], + + prompt = """ + You need to create profile data for the LendingClub company which specialises in lending various types of loans to urban customers. + + + You need to generate the data in the same order for the following fields (description of each field is followed after the colon): + + loan_amnt: The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value. + term: The number of payments on the loan. Values are in months and can be either 36 months or 60 months. + int_rate: Interest Rate on the loan + installment: The monthly payment owed by the borrower if the loan originates. + grade: LC assigned loan grade (Possible values: A, B, C, D, E, F, G) + sub_grade: LC assigned loan subgrade (Possible sub-values: 1-5 i.e A5) + emp_title: The job title supplied by the Borrower when applying for the loan. + emp_length: Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years. + home_ownership: The home ownership status provided by the borrower during registration or obtained from the credit report. Our values are: RENT, OWN, MORTGAGE, OTHER + annual_inc: The self-reported annual income provided by the borrower during registration. + verification_status: Indicates if income was verified by LC, not verified, or if the income source was verified + issue_d: The month which the loan was funded + loan_status: Current status of the loan + purpose: A category provided by the borrower for the loan request. + title: The loan title provided by the borrower + dti: A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage and the requested LC loan, divided by the borrower’s self-reported monthly income. + earliest_cr_line: The month the borrower's earliest reported credit line was opened + open_acc: The number of open credit lines in the borrower's credit file. + pub_rec: Number of derogatory public records + revol_bal: Total credit revolving balance + revol_util: Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit. + total_acc: The total number of credit lines currently in the borrower's credit file + initial_list_status: The initial listing status of the loan. Possible values are – W, F + application_type: Indicates whether the loan is an individual application or a joint application with two co-borrowers + mort_acc: Number of mortgage accounts. + pub_rec_bankruptcies: Number of public record bankruptcies + address: The physical address of the person + + Ensure PII from examples such as addresses are not used in the generated data to minimize any privacy concerns. + """, + schema=None + ), + + + UseCase.CREDIT_CARD_DATA: UseCaseMetadata( + name="Credit Card Data", + description="Synthetic data for credit card profile data", + topics=[ + "High income person", + "Low income person", + "Four-person family", + "Three-person family", + "Two-person family", + "Five-person family", + "more than 10 credit records", + "more than 20 credit records" + + ], + default_examples=[ + { + "ID": 100001, + "CODE_GENDER": "M", + "FLAG_OWN_CAR": "Y", + "FLAG_OWN_REALTY": "Y", + "CNT_CHILDREN": 2, + "AMT_INCOME_TOTAL": 85000, + "NAME_INCOME_TYPE": "Commercial associate", + "NAME_EDUCATION_TYPE": "Higher education", + "NAME_FAMILY_STATUS": "Married", + "NAME_HOUSING_TYPE": "House / apartment", + "DAYS_BIRTH": -12775, + "DAYS_EMPLOYED": -2890, + "FLAG_MOBIL": "Y", + "FLAG_WORK_PHONE": "Y", + "FLAG_PHONE": "Y", + "FLAG_EMAIL": "Y", + "OCCUPATION_TYPE": "Manager", + "CNT_FAM_MEMBERS": 4, + "CREDIT_RECORDS": [ + {"ID": 100001, "MONTHS_BALANCE": -24, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -23, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -22, "STATUS": "1"}, + {"ID": 100001, "MONTHS_BALANCE": -21, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -20, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -19, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -18, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -17, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -16, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -15, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -14, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -13, "STATUS": "1"}, + {"ID": 100001, "MONTHS_BALANCE": -12, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -11, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -10, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -9, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -8, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -7, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -6, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -5, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": -4, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -3, "STATUS": "0"}, + {"ID": 100001, "MONTHS_BALANCE": -2, "STATUS": "1"}, + {"ID": 100001, "MONTHS_BALANCE": -1, "STATUS": "C"}, + {"ID": 100001, "MONTHS_BALANCE": 0, "STATUS": "C"} + ] + }, + { + "ID": 100002, + "CODE_GENDER": "F", + "FLAG_OWN_CAR": "N", + "FLAG_OWN_REALTY": "N", + "CNT_CHILDREN": 0, + "AMT_INCOME_TOTAL": 42000, + "NAME_INCOME_TYPE": "Working", + "NAME_EDUCATION_TYPE": "Secondary / secondary special", + "NAME_FAMILY_STATUS": "Single / not married", + "NAME_HOUSING_TYPE": "Rented apartment", + "DAYS_BIRTH": -9850, + "DAYS_EMPLOYED": -1825, + "FLAG_MOBIL": "Y", + "FLAG_WORK_PHONE": "N", + "FLAG_PHONE": "Y", + "FLAG_EMAIL": "Y", + "OCCUPATION_TYPE": "Sales staff", + "CNT_FAM_MEMBERS": 1, + "CREDIT_RECORDS": [ + {"ID": 100002, "MONTHS_BALANCE": -18, "STATUS": "X"}, + {"ID": 100002, "MONTHS_BALANCE": -17, "STATUS": "X"}, + {"ID": 100002, "MONTHS_BALANCE": -16, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -15, "STATUS": "1"}, + {"ID": 100002, "MONTHS_BALANCE": -14, "STATUS": "2"}, + {"ID": 100002, "MONTHS_BALANCE": -13, "STATUS": "3"}, + {"ID": 100002, "MONTHS_BALANCE": -12, "STATUS": "C"}, + {"ID": 100002, "MONTHS_BALANCE": -11, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -10, "STATUS": "C"}, + {"ID": 100002, "MONTHS_BALANCE": -9, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -8, "STATUS": "1"}, + {"ID": 100002, "MONTHS_BALANCE": -7, "STATUS": "C"}, + {"ID": 100002, "MONTHS_BALANCE": -6, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -5, "STATUS": "C"}, + {"ID": 100002, "MONTHS_BALANCE": -4, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -3, "STATUS": "0"}, + {"ID": 100002, "MONTHS_BALANCE": -2, "STATUS": "1"}, + {"ID": 100002, "MONTHS_BALANCE": -1, "STATUS": "2"}, + {"ID": 100002, "MONTHS_BALANCE": 0, "STATUS": "C"} + ] + }, + { + "ID": 100003, + "CODE_GENDER": "M", + "FLAG_OWN_CAR": "Y", + "FLAG_OWN_REALTY": "Y", + "CNT_CHILDREN": 1, + "AMT_INCOME_TOTAL": 95000, + "NAME_INCOME_TYPE": "State servant", + "NAME_EDUCATION_TYPE": "Higher education", + "NAME_FAMILY_STATUS": "Married", + "NAME_HOUSING_TYPE": "House / apartment", + "DAYS_BIRTH": -15330, + "DAYS_EMPLOYED": -4380, + "FLAG_MOBIL": "Y", + "FLAG_WORK_PHONE": "Y", + "FLAG_PHONE": "Y", + "FLAG_EMAIL": "Y", + "OCCUPATION_TYPE": "Core staff", + "CNT_FAM_MEMBERS": 3, + "CREDIT_RECORDS": [ + {"ID": 100003, "MONTHS_BALANCE": -36, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -35, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -34, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -33, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -32, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -31, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -30, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -29, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -28, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -27, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -26, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -25, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -24, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -23, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -22, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -21, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -20, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -19, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -18, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -17, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -16, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -15, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -14, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -13, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -12, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -11, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -10, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -9, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -8, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -7, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -6, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -5, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -4, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -3, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -2, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": -1, "STATUS": "C"}, + {"ID": 100003, "MONTHS_BALANCE": 0, "STATUS": "C"} + ] + }, + { + "ID": 100004, + "CODE_GENDER": "F", + "FLAG_OWN_CAR": "N", + "FLAG_OWN_REALTY": "N", + "CNT_CHILDREN": 3, + "AMT_INCOME_TOTAL": 28000, + "NAME_INCOME_TYPE": "Pensioner", + "NAME_EDUCATION_TYPE": "Secondary / secondary special", + "NAME_FAMILY_STATUS": "Widow/Widower", + "NAME_HOUSING_TYPE": "Rented apartment", + "DAYS_BIRTH": -23725, + "DAYS_EMPLOYED": 365, + "FLAG_MOBIL": "Y", + "FLAG_WORK_PHONE": "N", + "FLAG_PHONE": "N", + "FLAG_EMAIL": "N", + "OCCUPATION_TYPE": "Pensioner", + "CNT_FAM_MEMBERS": 4, + "CREDIT_RECORDS": [ + {"ID": 100004, "MONTHS_BALANCE": -12, "STATUS": "0"}, + {"ID": 100004, "MONTHS_BALANCE": -11, "STATUS": "1"}, + {"ID": 100004, "MONTHS_BALANCE": -10, "STATUS": "2"}, + {"ID": 100004, "MONTHS_BALANCE": -9, "STATUS": "3"}, + {"ID": 100004, "MONTHS_BALANCE": -8, "STATUS": "4"}, + {"ID": 100004, "MONTHS_BALANCE": -7, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -6, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -5, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -4, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -3, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -2, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": -1, "STATUS": "5"}, + {"ID": 100004, "MONTHS_BALANCE": 0, "STATUS": "X"} + ] + } +], + prompt= """ + + Generate synthetic data for a credit card dataset. Here is the context about the dataset: + + Credit score cards are a common risk control method in the financial industry. It uses personal information and data submitted by credit card applicants to predict the probability of future defaults and credit card borrowings. The bank is able to decide whether to issue a credit card to the applicant. Credit scores can objectively quantify the magnitude of risk. + Generally speaking, credit score cards are based on historical data. Once encountering large economic fluctuations. Past models may lose their original predictive power. Logistic model is a common method for credit scoring. Because Logistic is suitable for binary classification tasks and can calculate the coefficients of each feature. In order to facilitate understanding and operation, the score card will multiply the logistic regression coefficient by a certain value (such as 100) and round it. + At present, with the development of machine learning algorithms. More predictive methods such as Boosting, Random Forest, and Support Vector Machines have been introduced into credit card scoring. However, these methods often do not have good transparency. It may be difficult to provide customers and regulators with a reason for rejection or acceptance. + + + The dataset consists of two tables: `User Records` and `Credit Records`, merged by `ID`. The output must create field values with the following specifications: + + User Records Fields (static per user): + - ID: Unique client number (e.g., 100001, 100002). + - CODE_GENDER: Gender ('F' or 'M'). + - FLAG_OWN_CAR: Car ownership ('Y' or 'N'). + - FLAG_OWN_REALTY: Property ownership ('Y' or 'N'). + - CNT_CHILDREN`: Number of children (0 or more). + - AMT_INCOME_TOTAL`: Annual income. + - NAME_INCOME_TYPE`: Income category (e.g., 'Commercial associate', 'State servant'). + - NAME_EDUCATION_TYPE`: Education level (e.g., 'Higher education', 'Secondary'). + - NAME_FAMILY_STATUS`: Marital status (e.g., 'Married', 'Single'). + - NAME_HOUSING_TYPE`: Way of living. + - DAYS_BIRTH`: Birthday Count backwards from current day (0), -1 means yesterday. + - DAYS_EMPLOYED: Start date of employment Count backwards from current day(0). If positive, it means the person currently unemployed. (negative for employed; positive for unemployed). + - FLAG_MOBIL: Is there a mobile phone ('Y'/'N') + - FLAG_WORK_PHONE: Is there a work phone ('Y'/'N') + - FLAG_PHONE: Is there a phone ('Y'/'N') + - FLAG_EMAIL: Is there an email ('Y'/'N') + - OCCUPATION_TYPE: Occupation (e.g., 'Manager', 'Sales staff'). + - CNT_FAM_MEMBERS: Family size (1 or more). + + Credit records Fields (nested array): + - ID: needs to be the same as the User Records Fields ID. + - MONTHS_BALANCE: Refers to Record month. The month of the extracted data is the starting point, backwards, 0 is the current month, -1 is the previous month, and so on. + - STATUS: + Must be one of ['0', '1', '2', '3', '4', '5', 'C', 'X']. + Values description: 0: 1-29 days past due 1: 30-59 days past due 2: 60-89 days overdue 3: 90-119 days overdue 4: 120-149 days overdue 5: Overdue or bad debts, write-offs for more than 150 days C: paid off that month X: No loan for the month + + + 3. Requirements: + - Consistency: Ensure `ID` consistency between the application and its nested credit records. + - Avoid real personal data (use synthetic values). + - Format output as three separate JSON objects, each with the structure shown in the examples. + + When generating the data, make sure to adhere to the following guidelines: + + Privacy guidelines: + - Avoid real PII. + - Ensure examples are not leaked into the synthetic data + + Cross-row entries guidelines (applies to Credit Records): + - Entries must be ordered from oldest (`MONTHS_BALANCE=-60`) to newest (`MONTHS_BALANCE=0`). + - No duplicate `MONTHS_BALANCE` values for a single client. + - The time-series credit record entries need to be logical and consistent when read in the correct sequence. + - Ensure there are no other cross-row Credit Records inconsistencies not listed above. + + Formatting guidelines: + - `CNT_CHILDREN`, `AMT_INCOME_TOTAL`, `DAYS_BIRTH`, `DAYS_EMPLOYED`, etc., must be integers. + - `MONTHS_BALANCE` must be an integer 0 or less. + - Ensure no other formatting problems or inconsistencies appear that are not listed above. + + Cross-row entries guidelines (applies to Credit Records): + - Entries must be ordered from oldest (`MONTHS_BALANCE=-60`) to newest (`MONTHS_BALANCE=0`). + - No duplicate `MONTHS_BALANCE` values for a single client. + - If a Recent `MONTHS_BALANCE` is 0 there should be an "X" (no loan) or "C" (paid off). + - The time-series credit record entries need to be logical and consistent when read in the correct sequence. (e.g. delinquencies can appear in progression as "0" → "1" → "2" as months progress from "-2" → "-1" → "0" etc). + - Ensure there are no other Credit Records inconsistencies appear that not listed above. + + + Cross-Column guidelines: + - Check cross-column inconsistencies such as: + If `FLAG_OWN_REALTY="Y"`, `NAME_HOUSING_TYPE` must **not** be "Rented apartment". + If `DAYS_EMPLOYED > 0` (unemployed), `AMT_INCOME_TOTAL` should be lower (e.g., ≤ $50,000). + `OCCUPATION_TYPE` must align with `NAME_INCOME_TYPE` (e.g., "Pensioner" cannot have "Manager" as occupation). + `CNT_FAM_MEMBERS` ≥ `CNT_CHILDREN` + 1 (accounting for at least one parent). + - Ensure there are no other cross-field Credit Records inconsistencies appear that are not listed above. + + + """, + + schema=None + ), +} + + +USE_CASE_CONFIGS_EVALS = { + UseCase.CODE_GENERATION: UseCaseMetadataEval( + name="Code Generation", + default_examples=[ + { + "score": 3, + "justification": """The code achieves 3 points by implementing core functionality correctly (1), + showing generally correct implementation with proper syntax (2), + and being suitable for professional use with good Python patterns and accurate functionality (3). + While it demonstrates competent development practices, it lacks the robust error handling + and type hints needed for point 4, and could benefit from better efficiency optimization and code organization.""" + }, + { + "score": 4, + "justification": """ + The code earns 4 points by implementing basic functionality (1), showing correct implementation (2), + being production-ready (3), and demonstrating high efficiency with Python best practices + including proper error handling, type hints, and clear documentation (4). + It exhibits experienced developer qualities with well-structured code and maintainable design, though + it lacks the comprehensive testing and security considerations needed for a perfect score.""" + } + ], + prompt= """Below is a Python coding Question and Solution pair generated by an LLM. Evaluate its quality as a Senior Developer would, considering its suitability for professional use. Use the additive 5-point scoring system described below. + +Points are accumulated based on the satisfaction of each criterion: + 1. Add 1 point if the code implements basic functionality and solves the core problem, even if it includes some minor issues or non-optimal approaches. + 2. Add another point if the implementation is generally correct but lacks refinement in style or fails to follow some best practices. It might use inconsistent naming conventions or have occasional inefficiencies. + 3. Award a third point if the code is appropriate for professional use and accurately implements the required functionality. It demonstrates good understanding of Python concepts and common patterns, though it may not be optimal. It resembles the work of a competent developer but may have room for improvement in efficiency or organization. + 4. Grant a fourth point if the code is highly efficient and follows Python best practices, exhibiting consistent style and appropriate documentation. It could be similar to the work of an experienced developer, offering robust error handling, proper type hints, and effective use of built-in features. The result is maintainable, well-structured, and valuable for production use. + 5. Bestow a fifth point if the code is outstanding, demonstrating mastery of Python and software engineering principles. It includes comprehensive error handling, efficient algorithms, proper testing considerations, and excellent documentation. The solution is scalable, performant, and shows attention to edge cases and security considerations. +""" - schema= DEFAULT_SQL_SCHEMA - ) + ), + + UseCase.TEXT2SQL: UseCaseMetadataEval( + name="Text to SQL", + + default_examples=[ { + "score": 3, + "justification": """The query earns 3 points by successfully retrieving basic data (1), + showing correct logical implementation (2), and being suitable for + professional use with accurate data retrieval and good SQL pattern understanding (3). + However, it lacks efficiency optimizations and consistent style conventions needed for + point 4, using basic JOINs without considering indexing or performance implications. + While functional, the query would benefit from better organization and efficiency improvements.""" + }, + + { + "score": 4, + "justification": """The query merits 4 points by retrieving basic data correctly (1), implementing proper + logic (2), being production-ready (3), and demonstrating high efficiency with proper + indexing considerations, well-structured JOINs, and consistent formatting (4). It + shows experienced developer qualities with appropriate commenting and performant SQL + features, though it lacks the comprehensive NULL handling and execution plan optimization needed for a + perfect score.""" + } + ], + prompt = """Below is a SQL Query Question and Solution pair generated by an LLM. Evaluate its quality as a Senior Database Developer would, considering its suitability for professional use. Use the additive 5-point scoring system described below. + + Points are accumulated based on the satisfaction of each criterion: + 1. Add 1 point if the query retrieves the basic required data, even if it includes some minor issues or non-optimal approaches. + 2. Add another point if the query is generally correct but lacks refinement in style or fails to follow some best practices. It might use inconsistent naming or have occasional inefficiencies. + 3. Award a third point if the query is appropriate for professional use and accurately retrieves the required data. It demonstrates good understanding of SQL concepts and common patterns, though it may not be optimal. It resembles the work of a competent database developer but may have room for improvement in efficiency or organization. + 4. Grant a fourth point if the query is highly efficient and follows SQL best practices, exhibiting consistent style and appropriate commenting. It could be similar to the work of an experienced developer, offering proper indexing considerations, efficient joins, and effective use of SQL features. The result is performant, well-structured, and valuable for production use. + 5. Bestow a fifth point if the query is outstanding, demonstrating mastery of SQL and database principles. It includes optimization for large datasets, proper handling of NULL values, consideration for execution plans, and excellent documentation. The solution is scalable, performs well, and shows attention to edge cases and data integrity. + """ + ), + + UseCase.CUSTOM: UseCaseMetadataEval( + name="Custom", + + default_examples=[ + { + "score": 1, + "justification": "The response demonstrates basic understanding but lacks depth and detail. While it provides minimal relevant information, significant improvement is needed in comprehensiveness, accuracy, and overall quality." + }, + { + "score": 2, + "justification": "The response shows moderate understanding with some relevant details. While key points are addressed, there are gaps in thoroughness and depth that could be improved for better quality and comprehensiveness." + }, + { + "score": 3, + "justification": "The response demonstrates good understanding (1), provides accurate information (2), and shows solid comprehension of the subject matter (3). While it effectively addresses the main points, it could benefit from more detailed analysis and supporting examples." + }, + { + "score": 4, + "justification": "The response excels by showing thorough understanding (1), providing comprehensive details (2), demonstrating clear analysis (3), and offering well-supported insights with relevant examples (4). It effectively addresses all key aspects while maintaining clarity and depth throughout." + }, + { + "score": 5, + "justification": "The response achieves excellence through exceptional understanding (1), comprehensive coverage (2), insightful analysis (3), well-supported arguments (4), and outstanding presentation with compelling examples and thorough explanations (5). It represents a complete and authoritative treatment of the subject." + } + ] + , + prompt = " ", + + ), + + UseCase.LENDING_DATA: UseCaseMetadataEval( + name="Lending Data", + + default_examples=[{ + 'score':10, + 'justification': ''' + 1. Privacy Compliance: No PII leakage detected. (No deductions). + 2. Formatting Consistency: + - Decimal precision (e.g., "10000.00", "12.05%") is correctly applied. + - Dates follow "Jan-YYYY" format. + - Term includes a space before the numeric value (e.g., " 36 months"). + - Zipcode and state alignment adheres to guidelines. (No deductions). + 3. Cross-Column Consistency: + - Grade/Subgrade Alignment: Subgrades (A5, B2, C4) align with their respective grades. + - Interest Rate vs. Grade/Subgrade: Rates increase with lower grades (e.g., 12.05% for A5 vs. 18.5% for C4). + - Mortgage Consistency: `mort_acc` matches `home_ownership` (e.g., MORTGAGE → `mort_acc=1`, OWN → `mort_acc=0`). + - Open vs. Total Accounts: `open_acc` ≤ `total_acc` in all records. + - No inconsistencies detected. (No deductions). + 4. Background Knowledge/Realism: + - Interest rates (12–18.5%) align with real-world lending practices at the issuance date. + - Loan terms (36/60 months), employment lengths (0–10 years), and DTI ratios (15–25%) are realistic. (No deductions). + 5. Other Violations: None identified. + + Final Rating: 10/10. The data adheres to all guidelines, with no critical errors or inconsistencies. + ''' + } + ], + prompt = """ + + Evaluate the given data for the LendingClub company which specialises in lending various types of loans to urban customers. + + Background: + LendingClub is a peer-to-peer lending platform connecting borrowers with investors. The dataset captures loan applications, + borrower profiles, and outcomes to assess credit risk, predict defaults, and determine interest rates. + + + + Each generated record must include the following defined fields in the exact order provided, with values generated. + + Record Field Definitions: + - loan_amnt: The listed amount of the loan applied for by the borrower. If at some point in time, the credit department + reduces the loan amount, then it will be reflected in this value. + - term: The number of payments on the loan. Values are in months and can be either " 36 months" or " 60 months". + - int_rate: Interest Rate on the loan + - installment: The monthly payment owed by the borrower if the loan originates. + - grade: LC assigned loan grade (Possible values: A, B, C, D, E, F, G) + - sub_grade: LC assigned loan subgrade (Possible sub-values: 1-5 i.e. A5) + - emp_title: The job title supplied by the Borrower when applying for the loan. + - emp_length: Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 + means ten or more years. + - home_ownership: The home ownership status provided by the borrower during registration or obtained from the credit report. + Possible values are: RENT, OWN, MORTGAGE, ANY, OTHER + - annual_inc: The self-reported annual income provided by the borrower during registration. + - verification_status: Indicates if income was verified by LC, not verified, or if the income source was verified + - issue_d: The month which the loan was funded + - loan_status: Current status of the loan (Possible values: "Fully Paid", "Charged Off") + - purpose: A category provided by the borrower for the loan request. + - title: The loan title provided by the borrower + - dti: A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage + and the requested LC loan, divided by the borrower’s self-reported monthly income. + - earliest_cr_line: The month the borrower's earliest reported credit line was opened + - open_acc: The number of open credit lines in the borrower's credit file. + - pub_rec: Number of derogatory public records + - revol_bal: Total credit revolving balance + - revol_util: Revolving line utilization rate, or the amount of credit the borrower is using relative to all available + revolving credit. + - total_acc: The total number of credit lines currently in the borrower's credit file + - initial_list_status: The initial listing status of the loan. Possible values are: w, f + - application_type: Indicates whether the loan is an individual application or a joint application with two co-borrowers + - mort_acc: Number of mortgage accounts. + - pub_rec_bankruptcies: Number of public record bankruptcies + - address: The physical address of the person + + + + + In addition to the definitions above, when evaluating the data samples, make sure the data adhere to following guidelines: + + Privacy Compliance guidelines: + 1) Allow PII reducted addresses to ensure privacy is maintained. Also, ensure the records address's zipcode and state match the given values in the seed instructions. + + Formatting guidelines: + 1) Check for consistent decimal precision. + 2) Ensure dates (e.g. issue_d, earliest_cr_line) follow the "Jan-YYYY" format. + 3) Validate that term has space before the number of months (i.e. " 36 months") + 4) State zipcode needs to be exactly as specified in the seed instructions. The persons address must follow the format as specified in the examples with the State zipcode coming last. + 5) Any other formatting guidelines that can be inferred from the examples or field definitions but are not listed above. + + Cross-column guidelines: + 1) Check for logical and realistic consistency and correlations between variables. Examples include but not limited to: + a) Grade/Sub-grade consistency: Sub-grade must match the grade (e.g., "B" grade → "B1" to "B5" possible subgrades). + b) Interest Rate vs Grade/Subgrade relationship: Higher subgrades (e.g., A5) could have higher `int_rate` than lower subgrades (e.g., A3). + c) Mortgage Consistency: `mort_acc` should be 1 or more if `home_ownership` is `MORTGAGE`. + d) Open Accounts: `open_acc` ≤ `total_acc`. + Note: Do not deduct point points based on the installment amount and its relationship with interest rate, loan amount, and term. The relationship has already been verified. + + Data distribution guidelines: + 1) Check if the generated values are statistically possible and within any ranges given the parameters defined in the seed instructions. + + Background knowledge and realism guidelines: + 1) Ensure fields such as interest rates reflect real-world interest rates at the time the loan is issued. + 2) Check all generated values if they are plausible given real-world background information. + + + + Scoring Workflow: + 1. Start at 10 points and deduct points for each violation: + - Privacy Compliance: -1 points for any violations related to privacy guidelines. + - Formatting: -1 point for any violations related to formatting inconsistencies. + - Cross-column: -4 points for any violations related to Cross-column inconsistencies. + - Background knowledge and realism: -1 point for any violations related to Background knowledge and realism inconsistencies. + Note: Allow made-up PII information without deducting points. + - Other violations: -2 points for any other violations, inconsistencies that you detect but are not listed above. + 2. Cap score at 1 if any critical errors (e.g., PII leakage, missing fields). + 4. Give a score rating 1-10 for the given data. If there are more than 9 points to subtract use 1 as the absolute minimum scoring. + 5. List all scoring justifications as list. + """ + ), + + UseCase.CREDIT_CARD_DATA: UseCaseMetadataEval( + name="Crdit Card Data", + + default_examples=[ + { + "score": 10, + "justification": """- No privacy violations detected (no PII leakage). + - All fields adhere to formatting requirements (integers where required, valid `MONTHS_BALANCE`, etc.). + - Cross-row entries are ordered correctly, no duplicates, and statuses progress logically (e.g., "0" → "1" → "2"). + - Cross-column consistency: + - `FLAG_OWN_REALTY="Y"` aligns with `NAME_HOUSING_TYPE`. + - Unemployed (`DAYS_EMPLOYED > 0`) have lower incomes. + - `OCCUPATION_TYPE` matches `NAME_INCOME_TYPE`. + - `CNT_FAM_MEMBERS` ≥ `CNT_CHILDREN` + 1. + - No other critical errors. + """ + + } + ], + prompt = """ + Evaluate the quality of the provided synthetic credit data and return a score between 1 and 10. The score should reflect how well the data adheres to the following criteria: + + Here is the context about the dataset: + + Credit score cards are a common risk control method in the financial industry. It uses personal information and data submitted by credit card applicants to predict the probability of future defaults and credit card borrowings. The bank is able to decide whether to issue a credit card to the applicant. Credit scores can objectively quantify the magnitude of risk. + Generally speaking, credit score cards are based on historical data. Once encountering large economic fluctuations. Past models may lose their original predictive power. Logistic model is a common method for credit scoring. Because Logistic is suitable for binary classification tasks and can calculate the coefficients of each feature. In order to facilitate understanding and operation, the score card will multiply the logistic regression coefficient by a certain value (such as 100) and round it. + At present, with the development of machine learning algorithms. More predictive methods such as Boosting, Random Forest, and Support Vector Machines have been introduced into credit card scoring. However, these methods often do not have good transparency. It may be difficult to provide customers and regulators with a reason for rejection or acceptance. + + + The dataset consists of two tables: `User Records` and `Credit Records`, merged by `ID`. The output must create field values with the following specifications: + + User Records Fields (static per user): + - ID: Unique client number (e.g., 100001, 100002). + - CODE_GENDER: Gender ('F' or 'M'). + - FLAG_OWN_CAR: Car ownership ('Y' or 'N'). + - FLAG_OWN_REALTY: Property ownership ('Y' or 'N'). + - CNT_CHILDREN`: Number of children (0 or more). + - AMT_INCOME_TOTAL`: Annual income. + - NAME_INCOME_TYPE`: Income category (e.g., 'Commercial associate', 'State servant'). + - NAME_EDUCATION_TYPE`: Education level (e.g., 'Higher education', 'Secondary'). + - NAME_FAMILY_STATUS`: Marital status (e.g., 'Married', 'Single'). + - NAME_HOUSING_TYPE`: Way of living. + - DAYS_BIRTH`: Birthday Count backwards from current day (0), -1 means yesterday. + - DAYS_EMPLOYED: Start date of employment Count backwards from current day(0). If positive, it means the person currently unemployed. (negative for employed; positive for unemployed). + - FLAG_MOBIL: Is there a mobile phone ('Y'/'N') + - FLAG_WORK_PHONE: Is there a work phone ('Y'/'N') + - FLAG_PHONE: Is there a phone ('Y'/'N') + - FLAG_EMAIL: Is there an email ('Y'/'N') + - OCCUPATION_TYPE: Occupation (e.g., 'Manager', 'Sales staff'). + - CNT_FAM_MEMBERS: Family size (1 or more). + + Credit Records Fields (nested array): + - ID: needs to be the same as the User Records Fields ID. + - MONTHS_BALANCE: Refers to Record month. The month of the extracted data is the starting point, backwards, 0 is the current month, -1 is the previous month, and so on. + - STATUS: + Must be one of ['0', '1', '2', '3', '4', '5', 'C', 'X']. + Values description: 0: 1-29 days past due 1: 30-59 days past due 2: 60-89 days overdue 3: 90-119 days overdue 4: 120-149 days overdue 5: Overdue or bad debts, write-offs for more than 150 days C: paid off that month X: No loan for the month + + + Evaluate whether the data adhere to the following guidelines: + + Privacy guidelines: + - Allow ficticious PII entries that do not leak PII. + + Formatting guidelines: + - `CNT_CHILDREN`, `AMT_INCOME_TOTAL`, `DAYS_BIRTH`, `DAYS_EMPLOYED`, etc., must be integers. + - `MONTHS_BALANCE` must be an integer 0 or less. + - Ensure no other formatting problems or inconsistencies appear that are not listed above. + + Cross-row entries guidelines (applies to Credit Records): + - Entries must be ordered from oldest (e.g. `MONTHS_BALANCE=-60`) to newest (`MONTHS_BALANCE=0`). + - No duplicate `MONTHS_BALANCE` values for a single client. + - Consecutive STATUS=C is allowed since it indicates that each monthly payment and amount owned is paid off. + - The time-series credit record entries need to be logical and consistent when read in the correct sequence as months progress from negative to 0. + - Ensure the records dont start from deliquency 2 but rather from 0, C or X. + - Ensure there are no other Credit Records inconsistencies appear that not listed above. + + + Cross-Column guidelines: + - Check cross-column inconsistencies such as: + If `FLAG_OWN_REALTY="Y"`, `NAME_HOUSING_TYPE` must **not** be "Rented apartment". + If `DAYS_EMPLOYED > 0` (unemployed), `AMT_INCOME_TOTAL` should be lower (e.g., ≤ $50,000). + `OCCUPATION_TYPE` must align with `NAME_INCOME_TYPE` (e.g., "Pensioner" cannot have "Manager" as occupation). + `CNT_FAM_MEMBERS` ≥ `CNT_CHILDREN` + 1 (accounting for at least one parent). + DAYS_BIRTH, DAYS_EMPLOYED, OCCUPATION_TYPE and other variables are reasonable when considered together. + - Ensure there are no other cross-field Credit Records inconsistencies appear that are not listed above. + + + Scoring Workflow: + Start at 10, deduct points for violations: + Subtract 2 points for any Privacy guidelines violations. + Subtract 1 point for any formatting guidelines violations. + Subtract 1 point for any cross-column violations. + Subtract 4 points for any Cross-row guidelines guidelines violations. + Subtract 2 points for any other problem with the generated data not listed above. + Cap minimum score score at 1 if any critical errors (e.g., missing `ID`, PII, or invalid `STATUS`). + + + Give a score rating 1-10 for the given data. If there are more than 9 points to subtract use 1 as the absolute minimum scoring. List all justification as list. + """ + ) } + + # Model configurations MODEL_CONFIGS = { ModelID.CLAUDE_V2: {"max_tokens": 100000, "max_input_tokens": 100000}, @@ -284,7 +1051,7 @@ def get_examples_for_topic(use_case: UseCase, topic: str) -> List[Dict[str, str] } } } - +from pathlib import Path JWT_PATH = Path("/tmp/jwt") def _get_caii_token() -> str: diff --git a/app/core/prompt_templates.py b/app/core/prompt_templates.py index 9785636..5f8d9fa 100644 --- a/app/core/prompt_templates.py +++ b/app/core/prompt_templates.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np from app.models.request_models import Example, Example_eval -from app.core.config import UseCase, Technique, ModelFamily, get_model_family,USE_CASE_CONFIGS, LENDING_DATA_PROMPT +from app.core.config import UseCase, Technique, ModelFamily, get_model_family,USE_CASE_CONFIGS, LENDING_DATA_PROMPT, USE_CASE_CONFIGS_EVALS from app.core.data_loader import DataLoader from app.core.data_analyser import DataAnalyser from app.core.summary_formatter import SummaryFormatter @@ -73,6 +73,8 @@ ); """ + + DEFAULT_freeform_TEXT2SQL_PROMPT = """Requirements: - Each solution must be a working SQL query - Include explanations where needed @@ -92,6 +94,43 @@ - "question" - "solution""" +Default_freeform_lending_data_prompt = """ + You need to create profile data for the LendingClub company which specialises in lending various types of loans to urban customers. + + +You need to generate the data in the same order for the following fields (description of each field is followed after the colon): + +loan_amnt: The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value. +term: The number of payments on the loan. Values are in months and can be either 36 months or 60 months. +int_rate: Interest Rate on the loan +installment: The monthly payment owed by the borrower if the loan originates. +grade: LC assigned loan grade (Possible values: A, B, C, D, E, F, G) +sub_grade: LC assigned loan subgrade (Possible sub-values: 1-5 i.e A5) +emp_title: The job title supplied by the Borrower when applying for the loan. +emp_length: Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years. +home_ownership: The home ownership status provided by the borrower during registration or obtained from the credit report. Our values are: RENT, OWN, MORTGAGE, OTHER +annual_inc: The self-reported annual income provided by the borrower during registration. +verification_status: Indicates if income was verified by LC, not verified, or if the income source was verified +issue_d: The month which the loan was funded +loan_status: Current status of the loan +purpose: A category provided by the borrower for the loan request. +title: The loan title provided by the borrower +dti: A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage and the requested LC loan, divided by the borrower’s self-reported monthly income. +earliest_cr_line: The month the borrower's earliest reported credit line was opened +open_acc: The number of open credit lines in the borrower's credit file. +pub_rec: Number of derogatory public records +revol_bal: Total credit revolving balance +revol_util: Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit. +total_acc: The total number of credit lines currently in the borrower's credit file +initial_list_status: The initial listing status of the loan. Possible values are – W, F +application_type: Indicates whether the loan is an individual application or a joint application with two co-borrowers +mort_acc: Number of mortgage accounts. +pub_rec_bankruptcies: Number of public record bankruptcies +address: The physical address of the person + +Ensure PII from examples such as addresses are not used in the generated data to minimize any privacy concerns. +""" + DEFAULT_TEXT2SQL_PROMPT = """Requirements: - Each solution must be a working SQL query - Include explanations where needed @@ -135,9 +174,10 @@ class PromptHandler: def format_examples(examples: List[Example]) -> str: """Format examples as JSON string""" return [ - {"question": example.question, "solution": example.solution} - for example in (examples) - ] + {"question": example.question, "solution": example.solution} + for example in (examples) + ] + @staticmethod def format_examples_eval(examples: List[Example_eval]) -> str: """Format examples as JSON string""" @@ -196,17 +236,7 @@ def get_freeform_default_custom_prompt(use_case:UseCase, custom_prompt): @staticmethod def get_default_custom_eval_prompt(use_case:UseCase, custom_prompt): if custom_prompt == None: - if use_case == UseCase.TEXT2SQL: - custom_prompt = DEFAULT_TEXT2SQL_EVAL_PROMPT - - return custom_prompt - elif use_case == UseCase.CODE_GENERATION: - custom_prompt = DEFAULT_CODE_GENERATION_EVAL_PROMPT - return custom_prompt - - elif use_case == UseCase.CUSTOM: - custom_prompt = " " - return custom_prompt + return USE_CASE_CONFIGS_EVALS[use_case].prompt else: return custom_prompt @staticmethod @@ -536,7 +566,13 @@ def get_freeform_eval_prompt(model_id: str, custom_prompt = Optional[str] ) -> str: custom_prompt_str = PromptHandler.get_default_custom_eval_prompt(use_case, custom_prompt) - examples_str = PromptHandler.get_default_eval_example(use_case, examples) + #examples_str = PromptHandler.get_default_eval_example(use_case, examples) + + if examples: + examples_str = PromptHandler.format_examples_eval(examples) + + elif examples == [] or examples == None: + examples_str = PromptHandler.format_examples_eval(USE_CASE_CONFIGS_EVALS[use_case].default_examples) base_prompt = """ You are a brilliant judge on evaluating a set of data with fields and corresponding values Follow the given instructions to understand the structure of given data and evaluate it based on parameters defined for you.""" @@ -1003,11 +1039,14 @@ def json_serializable(obj): examples_str = json.dumps(example_custom, indent=2) else: - if use_case == UseCase.CODE_GENERATION or use_case == UseCase.TEXT2SQL: - examples_str = json.dumps(USE_CASE_CONFIGS[use_case].default_examples) - else: - examples_str = None - custom_prompt_default = PromptHandler.get_freeform_default_custom_prompt(use_case, custom_prompt) + #if use_case == UseCase.CODE_GENERATION or use_case == UseCase.TEXT2SQL or use_case == UseCase.LENDING_DATA: + examples_str = json.dumps(USE_CASE_CONFIGS[use_case].default_examples) + + if custom_prompt is None: + custom_prompt_default = USE_CASE_CONFIGS[use_case].prompt + else: + custom_prompt_default = custom_prompt + #custom_prompt_default = PromptHandler.get_freeform_default_custom_prompt(use_case, custom_prompt) schema_str = PromptHandler.get_default_schema(use_case, schema) if use_case ==UseCase.TEXT2SQL: custom_prompt_str = f"""Using this database schema: diff --git a/app/main.py b/app/main.py index 02e9a0f..27cd98e 100644 --- a/app/main.py +++ b/app/main.py @@ -47,7 +47,7 @@ from app.services.export_results import Export_Service from app.core.prompt_templates import PromptBuilder, PromptHandler -from app.core.config import UseCase, USE_CASE_CONFIGS +from app.core.config import UseCase, USE_CASE_CONFIGS, USE_CASE_CONFIGS_EVALS from app.core.database import DatabaseManager from app.core.exceptions import APIError, InvalidModelError, ModelHandlerError from app.services.model_alignment import ModelAlignment @@ -469,10 +469,6 @@ def _flatten(d: dict, parent_key:str = "", sep:str=".") -> dict: description = "get json content") async def get_dataset_size(request: RelativePath): - - - - if not request.path: return JSONResponse(status_code=400, content={"status": "failed", "error": "path missing"}) @@ -948,7 +944,7 @@ async def customise_prompt(use_case: UseCase): async def customise_prompt(use_case: UseCase): """Allow users to customize prompt. Only part of the prompt which can be customized""" try: - return PromptHandler.get_freeform_default_custom_prompt(use_case, custom_prompt=None) + return USE_CASE_CONFIGS[use_case].prompt except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -956,7 +952,7 @@ async def customise_prompt(use_case: UseCase): async def customise_prompt(use_case: UseCase): """Allow users to customize prompt. Only part of the prompt which can be customized""" try: - return PromptHandler.get_default_custom_eval_prompt(use_case, custom_prompt=None) + return USE_CASE_CONFIGS_EVALS[use_case].prompt except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -1146,16 +1142,8 @@ async def delete_evaluation(file_name: str, file_path: Optional[str] = None): @app.get("/use-cases/{use_case}/topics") async def get_topics(use_case: UseCase): """Get available topics for a specific use case""" - uc_topics = {"code_generation": ["Algorithms", "Async Programming", - "Data Structures", "Database Operations", - "Python Basics", "Web Development"], - - "text2sql":["Aggregations", "Basic Queries", "Data Manipulation", - "Joins", "Subqueries", "Window Functions"], - "custom": [] - } - - topics = uc_topics[use_case] + + topics = USE_CASE_CONFIGS[use_case].topics return {"topics":topics} @@ -1168,50 +1156,12 @@ async def get_gen_examples(use_case: UseCase): @app.get("/{use_case}/eval_examples") async def get_eval_examples(use_case: UseCase): - if use_case == UseCase.CODE_GENERATION: - examples = [ - { - "score": 3, - "justification": """The code achieves 3 points by implementing core functionality correctly (1), - showing generally correct implementation with proper syntax (2), - and being suitable for professional use with good Python patterns and accurate functionality (3). - While it demonstrates competent development practices, it lacks the robust error handling - and type hints needed for point 4, and could benefit from better efficiency optimization and code organization.""" - }, - { - "score": 4, - "justification": """ - The code earns 4 points by implementing basic functionality (1), showing correct implementation (2), - being production-ready (3), and demonstrating high efficiency with Python best practices - including proper error handling, type hints, and clear documentation (4). - It exhibits experienced developer qualities with well-structured code and maintainable design, though - it lacks the comprehensive testing and security considerations needed for a perfect score.""" - } - ] - elif use_case == UseCase.TEXT2SQL: - - examples = [ { - "score": 3, - "justification": """The query earns 3 points by successfully retrieving basic data (1), - showing correct logical implementation (2), and being suitable for - professional use with accurate data retrieval and good SQL pattern understanding (3). - However, it lacks efficiency optimizations and consistent style conventions needed for - point 4, using basic JOINs without considering indexing or performance implications. - While functional, the query would benefit from better organization and efficiency improvements.""" - }, - - { - "score": 4, - "justification": """The query merits 4 points by retrieving basic data correctly (1), implementing proper - logic (2), being production-ready (3), and demonstrating high efficiency with proper - indexing considerations, well-structured JOINs, and consistent formatting (4). It - shows experienced developer qualities with appropriate commenting and performant SQL - features, though it lacks the comprehensive NULL handling and execution plan optimization needed for a - perfect score.""" - } - ] - elif use_case ==UseCase.CUSTOM: + + if use_case ==UseCase.CUSTOM: examples = [] + + else: + examples = USE_CASE_CONFIGS_EVALS[use_case].default_examples return {"examples": examples} diff --git a/app/models/request_models.py b/app/models/request_models.py index bf47e5d..ed884cd 100644 --- a/app/models/request_models.py +++ b/app/models/request_models.py @@ -2,12 +2,13 @@ import os from pydantic import BaseModel, Field, field_validator, ConfigDict from enum import Enum -from app.core.config import USE_CASE_CONFIGS +from app.core.config import USE_CASE_CONFIGS, UseCase -class UseCase(str, Enum): - CODE_GENERATION = "code_generation" - TEXT2SQL = "text2sql" - CUSTOM = "custom" + +# class UseCase(str, Enum): +# CODE_GENERATION = "code_generation" +# TEXT2SQL = "text2sql" +# CUSTOM = "custom" class Technique(str, Enum): SFT = "sft" diff --git a/app/run_job.py b/app/run_job.py index 6d15833..6c58dd7 100644 --- a/app/run_job.py +++ b/app/run_job.py @@ -34,7 +34,10 @@ from app.services.synthesis_service import SynthesisService import asyncio import nest_asyncio # Add this import +import json, pandas as pd, numpy as np, os +from app.agents.schema import GenerationPlan +plan = GenerationPlan.model_validate_json(os.environ.get("JOB_PARAMS")) # Enable nested event loop nest_asyncio.apply() diff --git a/pyproject.toml b/pyproject.toml index f051f28..9cb3c26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,4 @@ + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/uv.lock b/uv.lock index 73d1c05..b0375fb 100644 --- a/uv.lock +++ b/uv.lock @@ -2151,4 +2151,4 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/b7/4b3c7c7913a278d445cc6284e59b2e62fa25e72758f888b7a7a39eb8423f/yarl-1.18.3-cp313-cp313-win32.whl", hash = "sha256:61ee62ead9b68b9123ec24bc866cbef297dd266175d53296e2db5e7f797f902d", size = 310152 }, { url = "https://files.pythonhosted.org/packages/f5/d5/688db678e987c3e0fb17867970700b92603cadf36c56e5fb08f23e822a0c/yarl-1.18.3-cp313-cp313-win_amd64.whl", hash = "sha256:578e281c393af575879990861823ef19d66e2b1d0098414855dd367e234f5b3c", size = 315723 }, { url = "https://files.pythonhosted.org/packages/f5/4b/a06e0ec3d155924f77835ed2d167ebd3b211a7b0853da1cf8d8414d784ef/yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b", size = 45109 }, -] +] \ No newline at end of file