From 1e04e080012176db5dec88efbb31bbebeb32d65f Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 11:53:50 +0000 Subject: [PATCH 1/9] initial --- README.md | 6 +- {pandas_llm => examples}/example-chatbot.py | 2 +- {pandas_llm => examples}/example.py | 3 +- pandas_llm/requirements.txt | 21 --- pandas_llm/pyvenv.cfg => pyvenv.cfg | 0 requirements.txt | 21 --- setup.py | 2 +- src/__init__.py | 0 pandas_llm/__init__.py => src/pandas_llm.py | 188 +++++++------------- src/requirements.txt | 21 +++ src/sandbox.py | 53 ++++++ update_requirements.py | 13 ++ 12 files changed, 155 insertions(+), 175 deletions(-) rename {pandas_llm => examples}/example-chatbot.py (98%) rename {pandas_llm => examples}/example.py (95%) delete mode 100644 pandas_llm/requirements.txt rename pandas_llm/pyvenv.cfg => pyvenv.cfg (100%) delete mode 100644 requirements.txt create mode 100644 src/__init__.py rename pandas_llm/__init__.py => src/pandas_llm.py (65%) create mode 100644 src/requirements.txt create mode 100644 src/sandbox.py create mode 100644 update_requirements.py diff --git a/README.md b/README.md index b29e89d..725f971 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,12 @@ Here's a quick [example](https://github.com/DashyDashOrg/pandas-llm/blob/main/pa ```python import os import pandas as pd -from pandas_llm import PandasLLM +from src import PandasLLM # Data # Please note that these names, ages, and donations are randomly generated # and do not correspond to real individuals or their donations. -data = [('John Doe', 25, 50), +data = [('John Doe', 25, 50), ('Jane Smith', 38, 70), ('Alex Johnson', 45, 80), ('Jessica Brown', 60, 40), @@ -54,7 +54,7 @@ data = [('John Doe', 25, 50), ('Olivia Jackson', 29, 55)] df = pd.DataFrame(data, columns=['name', 'age', 'donation']) -conv_df = PandasLLM(data=df, llm_api_key = os.environ.get("OPENAI_API_KEY")) +conv_df = PandasLLM(data=df, llm_api_key=os.environ.get("OPENAI_API_KEY")) result = conv_df.prompt("What is the average donation of people older than 40 who donated more than $50?") code = conv_df.code_block diff --git a/pandas_llm/example-chatbot.py b/examples/example-chatbot.py similarity index 98% rename from pandas_llm/example-chatbot.py rename to examples/example-chatbot.py index a5f9767..e07628b 100644 --- a/pandas_llm/example-chatbot.py +++ b/examples/example-chatbot.py @@ -5,7 +5,7 @@ from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) -from pandas_llm import PandasLLM +from src import PandasLLM # Data # Please note that these names, ages, and donations are randomly generated and do not correspond to real individuals or their donations. diff --git a/pandas_llm/example.py b/examples/example.py similarity index 95% rename from pandas_llm/example.py rename to examples/example.py index 4585f98..ea8a725 100644 --- a/pandas_llm/example.py +++ b/examples/example.py @@ -1,10 +1,11 @@ +# examples/example.py import os import pandas as pd import sys from pathlib import Path sys.path.append(str(Path(__file__).resolve().parent.parent)) -from pandas_llm import PandasLLM +from src.pandas_llm import PandasLLM # Data # Please note that these names, ages, and donations are randomly generated diff --git a/pandas_llm/requirements.txt b/pandas_llm/requirements.txt deleted file mode 100644 index cdaed3e..0000000 --- a/pandas_llm/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -aiohttp==3.8.4 -aiosignal==1.3.1 -async-timeout==4.0.2 -attrs==23.1.0 -certifi==2023.5.7 -charset-normalizer==3.1.0 -frozenlist==1.3.3 -idna==3.4 -multidict==6.0.4 -numpy==1.24.3 -openai==0.27.6 -pandas==2.0.1 -python-dateutil==2.8.2 -pytz==2023.3 -requests==2.30.0 -RestrictedPython==6.0 -six==1.16.0 -tqdm==4.65.0 -tzdata==2023.3 -urllib3==2.0.2 -yarl==1.9.2 diff --git a/pandas_llm/pyvenv.cfg b/pyvenv.cfg similarity index 100% rename from pandas_llm/pyvenv.cfg rename to pyvenv.cfg diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index cdaed3e..0000000 --- a/requirements.txt +++ /dev/null @@ -1,21 +0,0 @@ -aiohttp==3.8.4 -aiosignal==1.3.1 -async-timeout==4.0.2 -attrs==23.1.0 -certifi==2023.5.7 -charset-normalizer==3.1.0 -frozenlist==1.3.3 -idna==3.4 -multidict==6.0.4 -numpy==1.24.3 -openai==0.27.6 -pandas==2.0.1 -python-dateutil==2.8.2 -pytz==2023.3 -requests==2.30.0 -RestrictedPython==6.0 -six==1.16.0 -tqdm==4.65.0 -tzdata==2023.3 -urllib3==2.0.2 -yarl==1.9.2 diff --git a/setup.py b/setup.py index 5515c65..b8cb8b6 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ long_description = fh.read() setup( - name='pandas_llm', # should match the package folder + name='src', # should match the package folder version='0.0.6', # important for updates license='MIT', # should match your chosen license description='Conversational Pandas Dataframes', diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pandas_llm/__init__.py b/src/pandas_llm.py similarity index 65% rename from pandas_llm/__init__.py rename to src/pandas_llm.py index ac54fb2..189c5d3 100644 --- a/pandas_llm/__init__.py +++ b/src/pandas_llm.py @@ -1,104 +1,44 @@ -import pandas as pd +# src/pandas_llm.py import datetime import numpy as np -import openai +from openai import OpenAI import os import re import json - -# sandbox.py -from RestrictedPython import compile_restricted -from RestrictedPython.Guards import safe_builtins,guarded_iter_unpack_sequence -from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter import pandas as pd +from dotenv import load_dotenv +load_dotenv() -class Sandbox: - def __init__(self): - self._allowed_imports = {} - - def allow_import(self, module_name): - try: - module = __import__(module_name) - self._allowed_imports[module_name] = module - except ImportError: - pass - - def execute(self, code, local_vars = {}): - allowed_builtins = safe_builtins - # Add __builtins__, __import__, and allowed imports to the globals - restricted_globals = {"__builtins__": allowed_builtins} - restricted_globals.update(self._allowed_imports) - - builtin_mappings = { - "__import__": __import__, - "_getattr_": default_guarded_getattr, - "_getitem_": default_guarded_getitem, - "_getiter_": default_guarded_getiter, - "_iter_unpack_sequence_": guarded_iter_unpack_sequence, - "list": list, - "set": set, - "pd": pd, - } - - series_methods = [ - "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", "cumprod", "diff", - "dropna", "fillna", "head", "idxmax", "idxmin", "last", "max", "min", "notna", - "prod", "quantile", "rename", "round", "tail", "to_frame", "to_list", "to_numpy", - "to_string","unique", "sort_index", "sort_values", "aggregate" - ] - - - builtin_mappings.update({method: getattr(pd.Series, method) for method in series_methods}) - - restricted_globals["__builtins__"].update(builtin_mappings) - - byte_code = compile_restricted(source=code, filename='', mode='exec') - - # Execute the restricted code - exec(byte_code, restricted_globals, local_vars) - - return local_vars +from src.sandbox import Sandbox +# initialise the openai client +client = OpenAI() class PandasLLM(pd.DataFrame): """ PandasLLM is a subclass of the Pandas DataFrame class. It is designed to provide a - wrapper around the OpenAI API. + wrapper around the OpenAI API. """ - code_blocks = [r'```python(.*?)```',r'```(.*?)```'] - - llm_default_model = "gpt-3.5-turbo" - llm_default_temperature = 0.2 - llm_engine = "openai" - llm_default_params = { "model": llm_default_model, - "temperature": llm_default_temperature} - llm_api_key = None - - prompt_override = False - custom_prompt = "" - data_privacy = True - path = None - verbose = False - code_block = "" - force_sandbox = False - def __init__(self, - data, - llm_engine:str = "openai", llm_params=llm_default_params, - prompt_override:bool = False, - custom_prompt:str = "", - path:str = None, - verbose:bool = False, - data_privacy:bool = True, - llm_api_key:str = None, - force_sandbox:bool = False, + def __init__(self, + data, + llm_engine: str = "openai", + llm_kwargs={"model": "gpt-4o-mini", + "temperature": 0.2}, + prompt_override: bool = False, + custom_prompt: str = "", + path: str = None, + verbose: bool = False, + data_privacy: bool = True, + llm_api_key: str = None, + force_sandbox: bool = False, *args, **kwargs): """ This is the constructor for the PandasLLM class. It takes in the following arguments: data: The data to be used. It can be a Pandas DataFrame, a list of lists, a list of tuples, a list of dictionaries, a dictionary, a string, or a list. llm_engine: The name of the OpenAI engine to use. - llm_params: A dictionary of parameters to be used with the OpenAI API. + llm_kwargs: A dictionary of parameters to be used with the OpenAI API. prompt_override: A boolean that determines whether or not the prompt is overridden. custom_prompt: A string that overrides the prompt. path: The path to the file to be used. @@ -109,13 +49,13 @@ def __init__(self, The constructor also calls the parent class's constructor. - + Args: data (pandas dataframe, mandatory): dataset to query. Defaults to None. llm_engine (str, optional): LLM engine, currently only OpenAI is supported. Defaults to "openai". - llm_params (dict, optional): LLM engine parameters. Defaults to model=gpt-3.5-turbo and temperature=0.2". + llm_kwargs (dict, optional): LLM engine parameters. Defaults to model=gpt-3.5-turbo and temperature=0.2". prompt_override (bool, optional): if True, the custom prompt is mandatory and it will became the main prompt. Defaults to False. - custom_prompt (str, optional): if prompt_override is False, the custom prompt will be added to the default pandas_llm prompt. Defaults to "". + custom_prompt (str, optional): if prompt_override is False, the custom prompt will be added to the default src prompt. Defaults to "". path (str, optional): the path where the files containing debug data will be save. Defaults to None. verbose (bool, optional): if True debugging info will be printed. Defaults to False. data_privacy (bool, optional): if True, the function will not send the data content to OpenAI. Defaults to True. @@ -123,18 +63,17 @@ def __init__(self, force_sandbox (bool, optional): if False and the sandbox fails, it will retry using eval (less safe). Defaults to False. """ - super().__init__(data, *args, **kwargs) - - self.llm_params = llm_params or {} + + self.llm_kwargs = llm_kwargs or {} # Set up OpenAI API key from the environment or the config self.llm_api_key = llm_api_key or os.environ.get("OPENAI_API_KEY") self.llm_engine = llm_engine - self.llm_params = llm_params or {} - self.model = self.llm_params.get("model", self.llm_default_model) - self.temperature = self.llm_params.get("temperature", self.llm_default_temperature) + self.llm_kwargs = llm_kwargs or {} + self.model = self.llm_kwargs.get("model") + self.temperature = self.llm_kwargs.get("temperature") self.prompt_override = prompt_override self.custom_prompt = custom_prompt @@ -143,6 +82,7 @@ def __init__(self, self.path = path self.verbose = verbose self.force_sandbox = force_sandbox + self.code_blocks = [r'```python(.*?)```', r'```(.*?)```'] def _buildPromptForRole(self): prompt_role = f""" @@ -186,7 +126,6 @@ def _buildPromptForProblemSolving(self, request): """ if not self.custom_prompt is None and len(self.custom_prompt) > 0: - prompt_problem += f""" Also: {self.custom_prompt} @@ -197,18 +136,18 @@ def _buildPromptForProblemSolving(self, request): def _extractPythonCode(self, text: str, regexp: str) -> str: # Define the regular expression pattern for the Python code block pattern = regexp - + # Search for the pattern in the input text match = re.search(pattern, text, re.DOTALL) - + # If a match is found, return the extracted code (without the markers) if match: return match.group(1).strip() - + # If no match is found, return an empty string return "" - def _print(self, *args, **kwargs): + def _print(self, *args, **kwargs): if self.verbose: print(*args, **kwargs) @@ -233,11 +172,10 @@ def _print(self, *args, **kwargs): # return str(variable) # except Exception as e: # return str(variable) - - def _save(self,name,value): + def _save(self, name, value): if self.path is None or self.path == "": - return + return try: with open(f"{self.path}/{name}", 'w') as file: file.write(value) @@ -245,7 +183,7 @@ def _save(self,name,value): self._print(f"error {e}") return - def _execInSandbox(self, df, generated_code:str): + def _execInSandbox(self, df, generated_code: str): # Create a Sandbox instance and allow pandas to be imported sandbox = Sandbox() @@ -264,9 +202,9 @@ def _execInSandbox(self, df, generated_code:str): # Combine the initial code and the generated code full_code = initial_code + "\n" + generated_code - self._save("temp/prompt_code.py",full_code) + self._save("temp/prompt_code.py", full_code) # Execute the combined code in the Sandbox - sandbox_result = sandbox.execute(full_code, {"df":df}) + sandbox_result = sandbox.execute(full_code, {"df": df}) # Get the result from the local_vars dictionary result = sandbox_result.get("result") @@ -281,25 +219,22 @@ def prompt(self, request: str): Returns: Any: contains the result or solution of the problem. Tipically the result data type is a dataframe, a Series or a float """ - - # Set up OpenAI API key - openai.api_key = self.llm_api_key - - messages=[ - {"role": "system", - "content": self._buildPromptForRole()}, - {"role": "user", - "content": self._buildPromptForProblemSolving(request) - } - ] + + + + self.llm_kwargs['messages'] = [ + {"role": "system", + "content": self._buildPromptForRole()}, + {"role": "user", + "content": self._buildPromptForProblemSolving(request) + } + ] response = None - for times in range(0,3): + for times in range(0, 3): try: - response = openai.ChatCompletion.create( - model=self.model, - temperature=self.temperature, - messages = messages + response = client.chat.completions.create( + **self.llm_kwargs ) break; except Exception as e: @@ -309,18 +244,18 @@ def prompt(self, request: str): if response is None: return "Please try later" - self._save("temp/prompt_cmd.json",json.dumps(messages, indent=4)) + self._save("temp/prompt_cmd.json", json.dumps(self.llm_kwargs['messages'], indent=4)) generated_code = response.choices[0].message.content if generated_code == "" or generated_code is None: self.code_block = "" return None - + self.code_block = generated_code - results=[] + results = [] for regexp in self.code_blocks: - cleaned_code = self._extractPythonCode(generated_code,regexp) + cleaned_code = self._extractPythonCode(generated_code, regexp) if cleaned_code == "" or cleaned_code is None: continue results.append(cleaned_code) @@ -331,7 +266,7 @@ def prompt(self, request: str): result = None for cleaned_code in results: - + try: result = self._execInSandbox(self, cleaned_code) except Exception as e: @@ -339,7 +274,8 @@ def prompt(self, request: str): if not self.force_sandbox: try: expression = re.sub(r"^\s*result\s*=", "", cleaned_code).strip() - result = eval(expression, {'df': self, 'pd': pd, 'np': np, 'datetime': datetime, 'result': result}) + result = eval(expression, + {'df': self, 'pd': pd, 'np': np, 'datetime': datetime, 'result': result}) except Exception as e: self._print(f"error {e}") pass @@ -350,10 +286,8 @@ def prompt(self, request: str): if self.data_privacy == True: # non formatted result return result - + # currently the privacy option is not needed. # in the future, we can choose to send data to LLM if privacy is set to false - return result - - \ No newline at end of file + return result \ No newline at end of file diff --git a/src/requirements.txt b/src/requirements.txt new file mode 100644 index 0000000..cc53efc --- /dev/null +++ b/src/requirements.txt @@ -0,0 +1,21 @@ +aiohttp +aiosignal +async-timeout +attrs +certifi +charset-normalizer +frozenlist +idna +multidict +numpy +openai +pandas +python-dateutil +pytz +requests +RestrictedPython +six +tqdm +tzdata +urllib3 +yarl diff --git a/src/sandbox.py b/src/sandbox.py new file mode 100644 index 0000000..993a646 --- /dev/null +++ b/src/sandbox.py @@ -0,0 +1,53 @@ +# src/sandbox.py +import pandas as pd +from RestrictedPython import compile_restricted +from RestrictedPython.Guards import safe_builtins,guarded_iter_unpack_sequence +from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter + +class Sandbox: + def __init__(self): + self._allowed_imports = {} + + def allow_import(self, module_name): + try: + module = __import__(module_name) + self._allowed_imports[module_name] = module + except ImportError: + pass + + def execute(self, code, local_vars = {}): + allowed_builtins = safe_builtins + # Add __builtins__, __import__, and allowed imports to the globals + restricted_globals = {"__builtins__": allowed_builtins} + restricted_globals.update(self._allowed_imports) + + builtin_mappings = { + "__import__": __import__, + "_getattr_": default_guarded_getattr, + "_getitem_": default_guarded_getitem, + "_getiter_": default_guarded_getiter, + "_iter_unpack_sequence_": guarded_iter_unpack_sequence, + "list": list, + "set": set, + "pd": pd, + } + + series_methods = [ + "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", "cumprod", "diff", + "dropna", "fillna", "head", "idxmax", "idxmin", "last", "max", "min", "notna", + "prod", "quantile", "rename", "round", "tail", "to_frame", "to_list", "to_numpy", + "to_string","unique", "sort_index", "sort_values", "aggregate" + ] + + + builtin_mappings.update({method: getattr(pd.Series, method) for method in series_methods}) + + restricted_globals["__builtins__"].update(builtin_mappings) + + byte_code = compile_restricted(source=code, filename='', mode='exec') + + # Execute the restricted code + exec(byte_code, restricted_globals, local_vars) + + return local_vars + diff --git a/update_requirements.py b/update_requirements.py new file mode 100644 index 0000000..5a352e6 --- /dev/null +++ b/update_requirements.py @@ -0,0 +1,13 @@ +import subprocess + +def install_or_update_packages(requirements_file: str): + with open(requirements_file, 'r') as file: + packages = file.readlines() + + for package in packages: + package_name = package.strip() + if package_name: + subprocess.run(['pip', 'install', '--upgrade', package_name]) + +# Example usage +install_or_update_packages('src/requirements.txt') \ No newline at end of file From 7f8196ee141a1afc7681417b0db1ca2571f39f02 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 12:05:21 +0000 Subject: [PATCH 2/9] initial --- examples/EC3_equations_short.csv | 38 +++ examples/__init__.py | 32 +++ examples/example-chatbot.py | 80 ------- examples/example.py | 44 ++-- pyvenv.cfg | 3 - src/requirements.txt => requirements.txt | 0 setup.cfg | 2 - setup.py | 51 +--- src/pandas_llm.py | 293 ----------------------- src/pandas_query.py | 120 ++++++++++ src/sandbox.py | 53 ---- update_requirements.py | 13 - 12 files changed, 216 insertions(+), 513 deletions(-) create mode 100644 examples/EC3_equations_short.csv create mode 100644 examples/__init__.py delete mode 100644 examples/example-chatbot.py delete mode 100644 pyvenv.cfg rename src/requirements.txt => requirements.txt (100%) delete mode 100644 setup.cfg delete mode 100644 src/pandas_llm.py create mode 100644 src/pandas_query.py delete mode 100644 src/sandbox.py delete mode 100644 update_requirements.py diff --git a/examples/EC3_equations_short.csv b/examples/EC3_equations_short.csv new file mode 100644 index 0000000..f3b455a --- /dev/null +++ b/examples/EC3_equations_short.csv @@ -0,0 +1,38 @@ +,section_number,section_title,equation,variables +0,1,Section 1,"{""expression"": ""x-x \\\\text{ axis along a member}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""description"": ""The x-x axis is the longitudinal axis along the member."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7428aa91-6096-4981-b09f-b18de708d848"", ""description"": ""x-x axis along a member"", ""name"": ""x"", ""parent_uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""uuid"": ""7428aa91-6096-4981-b09f-b18de708d848"", ""type"": ""variable""}]" +1,1,Section 1,"{""expression"": ""y-y \\\\text{ axis of a cross-section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""description"": ""The y-y axis is the vertical axis of a cross-section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""47906c6b-56e4-482a-b92f-0ce8e76339e0"", ""description"": ""y-y axis of a cross-section"", ""name"": ""y"", ""parent_uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""uuid"": ""47906c6b-56e4-482a-b92f-0ce8e76339e0"", ""type"": ""variable""}]" +2,1,Section 1,"{""expression"": ""z-z \\\\text{ axis of a cross-section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""description"": ""The z-z axis is the horizontal axis of a cross-section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f104cba3-5400-4456-b37f-419d81a166cd"", ""description"": ""z-z axis of a cross-section"", ""name"": ""z"", ""parent_uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""uuid"": ""f104cba3-5400-4456-b37f-419d81a166cd"", ""type"": ""variable""}]" +3,1,Section 1,"{""expression"": ""u-u \\\\text{ major principal axis}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""description"": ""The u-u axis is the major principal axis, not coinciding with the y-y axis."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8ee7fb0b-4f69-4150-a907-41da6142bac4"", ""description"": ""major principal axis"", ""name"": ""u"", ""parent_uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""uuid"": ""8ee7fb0b-4f69-4150-a907-41da6142bac4"", ""type"": ""variable""}]" +4,1,Section 1,"{""expression"": ""v-v \\\\text{ minor principal axis}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""description"": ""The v-v axis is the minor principal axis, not coinciding with the z-z axis."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9b2f1377-a981-4ffd-adb0-ff36b72c4fc1"", ""description"": ""minor principal axis"", ""name"": ""v"", ""parent_uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""uuid"": ""9b2f1377-a981-4ffd-adb0-ff36b72c4fc1"", ""type"": ""variable""}]" +5,1,Section 1,"{""expression"": ""b \\\\text{ width of a cross section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""de7f9e9d-f4a2-4078-9829-fcb8a9986919"", ""description"": ""The width of the cross section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""de7f9e9d-f4a2-4078-9829-fcb8a9986919"", ""type"": ""equation"", ""variables"": []}",[] +6,1,Section 1,"{""expression"": ""h \\\\text{ depth of a cross section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""90374ae4-1624-4ae4-b414-74575ba976d3"", ""description"": ""The depth of the cross section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""90374ae4-1624-4ae4-b414-74575ba976d3"", ""type"": ""equation"", ""variables"": []}",[] +7,1,Section 1,"{""expression"": ""d \\\\text{ depth of straight portion of a web}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a19f5dbb-8d6a-49f4-af09-03f81483d271"", ""description"": ""The depth of the straight portion of a web."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""a19f5dbb-8d6a-49f4-af09-03f81483d271"", ""type"": ""equation"", ""variables"": []}",[] +8,1,Section 1,"{""expression"": ""t_{w} \\\\text{ web thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3f3a1d96-fe34-4e48-a576-1196eb095a02"", ""description"": ""The thickness of the web."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3f3a1d96-fe34-4e48-a576-1196eb095a02"", ""type"": ""equation"", ""variables"": []}",[] +9,1,Section 1,"{""expression"": ""t_{f} \\\\text{ flange thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""894aab11-f36d-481e-b286-dd506aaa6fa2"", ""description"": ""The thickness of the flange."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""894aab11-f36d-481e-b286-dd506aaa6fa2"", ""type"": ""equation"", ""variables"": []}",[] +10,1,Section 1,"{""expression"": ""r \\\\text{ radius of root fillet}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""17e80b15-0de0-480d-9d81-4c4d7f38e563"", ""description"": ""The radius of the root fillet."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""17e80b15-0de0-480d-9d81-4c4d7f38e563"", ""type"": ""equation"", ""variables"": []}",[] +11,1,Section 1,"{""expression"": ""r_{1} \\\\text{ radius of root fillet}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9cb87802-20bd-4cba-b27b-e7a04104f2de"", ""description"": ""The radius of the root fillet (alternative notation)."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""9cb87802-20bd-4cba-b27b-e7a04104f2de"", ""type"": ""equation"", ""variables"": []}",[] +12,1,Section 1,"{""expression"": ""r_{2} \\\\text{ toe radius}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""5eee8bdf-cd2b-4d76-9be7-46b43f110930"", ""description"": ""The toe radius."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""5eee8bdf-cd2b-4d76-9be7-46b43f110930"", ""type"": ""equation"", ""variables"": []}",[] +13,1,Section 1,"{""expression"": ""t \\\\text{ thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c18f50a2-522a-49f1-9ad4-f2f64fb99df3"", ""description"": ""The thickness of the member."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""c18f50a2-522a-49f1-9ad4-f2f64fb99df3"", ""type"": ""equation"", ""variables"": []}",[] +14,1.2.1,General reference standards,"{""expression"": ""EN 1090"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""111950e8-8f12-452e-b56c-2d5297339ed8"", ""description"": ""Execution of steel structures - Technical requirements"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""111950e8-8f12-452e-b56c-2d5297339ed8"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] +15,1.2.1,General reference standards,"{""expression"": ""EN ISO 12944"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""86006784-ccee-49e3-a192-9aef09e923f3"", ""description"": ""Paints and varnishes - Corrosion protection of steel structures by protective paint systems"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""86006784-ccee-49e3-a192-9aef09e923f3"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] +16,1.2.1,General reference standards,"{""expression"": ""AC 2 EN ISO 1461 $4 AC_{2}$"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""504404b4-30e5-4189-aa8d-192411838536"", ""description"": ""Hot dip galvanized coatings on fabricated iron and steel articles - specifications and test methods"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""504404b4-30e5-4189-aa8d-192411838536"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] +17,1.5.6,buckling length,"{""expression"": ""L_{b} = k_{l} \\\\cdot L_{0}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""description"": ""The buckling length (L_{b}) is calculated as the product of the effective length factor (k_{l}) and the unbraced length (L_{0})."", ""parent_uuid"": ""07cbadb6-84ca-4684-86b3-84566c7145f4"", ""uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""type"": ""equation""}","[{""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""fbac13d8-4b4f-4e79-af8f-6f30334c9a76"", ""description"": ""Effective length factor"", ""name"": ""k_{l}"", ""parent_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""uuid"": ""fbac13d8-4b4f-4e79-af8f-6f30334c9a76"", ""type"": ""variable""}, {""unit"": ""m"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""e253a218-e3bc-4915-b883-4d7443c75559"", ""description"": ""Unbraced length of the member"", ""name"": ""L_{0}"", ""parent_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""uuid"": ""e253a218-e3bc-4915-b883-4d7443c75559"", ""type"": ""variable""}]" +18,2,Section 2,"{""expression"": ""P_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""description"": ""Nominal value of the effect of prestressing imposed during erection"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""be1d4b88-50a1-4237-bb06-8f4d60a31678"", ""description"": ""Nominal value of the effect of prestressing imposed during erection"", ""name"": ""P_{k}"", ""parent_uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""uuid"": ""be1d4b88-50a1-4237-bb06-8f4d60a31678"", ""type"": ""variable""}]" +19,2,Section 2,"{""expression"": ""G_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""description"": ""Nominal value of the effect of permanent actions"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""6573bd15-5a65-40c7-a8dd-b930bd57e3eb"", ""description"": ""Nominal value of the effect of permanent actions"", ""name"": ""G_{k}"", ""parent_uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""uuid"": ""6573bd15-5a65-40c7-a8dd-b930bd57e3eb"", ""type"": ""variable""}]" +20,2,Section 2,"{""expression"": ""X_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""description"": ""Characteristic values of material property"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2b0a31f9-8ed5-402c-b79f-d6d139250bab"", ""description"": ""Characteristic values of material property"", ""name"": ""X_{k}"", ""parent_uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""uuid"": ""2b0a31f9-8ed5-402c-b79f-d6d139250bab"", ""type"": ""variable""}]" +21,2,Section 2,"{""expression"": ""X_{n}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""description"": ""Nominal values of material property"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""843548c5-6333-464b-9c4d-0a4207cb9568"", ""description"": ""Nominal values of material property"", ""name"": ""X_{n}"", ""parent_uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""uuid"": ""843548c5-6333-464b-9c4d-0a4207cb9568"", ""type"": ""variable""}]" +22,2,Section 2,"{""expression"": ""R_{d}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""description"": ""Design value of resistance"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d2d0f544-2f19-4529-8e8b-cf7e7d529908"", ""description"": ""Design value of resistance"", ""name"": ""R_{d}"", ""parent_uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""uuid"": ""d2d0f544-2f19-4529-8e8b-cf7e7d529908"", ""type"": ""variable""}]" +23,2,Section 2,"{""expression"": ""R_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""description"": ""Characteristic value of resistance"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""35eec3c1-56f3-47a9-9e3f-997136e039c6"", ""description"": ""Characteristic value of resistance"", ""name"": ""R_{k}"", ""parent_uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""uuid"": ""35eec3c1-56f3-47a9-9e3f-997136e039c6"", ""type"": ""variable""}]" +24,2,Section 2,"{""expression"": ""\\\\gamma_{M}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""description"": ""General partial factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""287695b0-64ee-4b9c-babd-3814879c2870"", ""description"": ""General partial factor"", ""name"": ""\\\\gamma_{M}"", ""parent_uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""uuid"": ""287695b0-64ee-4b9c-babd-3814879c2870"", ""type"": ""variable""}]" +25,2,Section 2,"{""expression"": ""\\\\gamma_{Mi}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""description"": ""Particular partial factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""cb625bc8-3efe-40b5-b743-7fc04540c1c0"", ""description"": ""Particular partial factor"", ""name"": ""\\\\gamma_{Mi}"", ""parent_uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""uuid"": ""cb625bc8-3efe-40b5-b743-7fc04540c1c0"", ""type"": ""variable""}]" +26,2,Section 2,"{""expression"": ""\\\\gamma_{Mf}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""description"": ""Partial factor for fatigue"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f894e44e-3f98-4e1c-aee8-c3f82ce4b8ec"", ""description"": ""Partial factor for fatigue"", ""name"": ""\\\\gamma_{Mf}"", ""parent_uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""uuid"": ""f894e44e-3f98-4e1c-aee8-c3f82ce4b8ec"", ""type"": ""variable""}]" +27,2,Section 2,"{""expression"": ""\\\\eta"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""description"": ""Conversion factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""30489fe4-f5d2-4949-b46b-df0e6070b6dd"", ""description"": ""Conversion factor"", ""name"": ""\\\\eta"", ""parent_uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""uuid"": ""30489fe4-f5d2-4949-b46b-df0e6070b6dd"", ""type"": ""variable""}]" +28,2,Section 2,"{""expression"": ""a_{d}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""description"": ""Design value of geometrical data"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c68c19e4-bfba-4c83-a85a-357eeaf45568"", ""description"": ""Design value of geometrical data"", ""name"": ""a_{d}"", ""parent_uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""uuid"": ""c68c19e4-bfba-4c83-a85a-357eeaf45568"", ""type"": ""variable""}]" +29,2.1.3.1,General,"{""expression"": ""AC_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""description"": ""General requirements for the durability of steel structures based on the type of action and design working life."", ""parent_uuid"": ""972ae989-5a40-4498-9f05-c4c433597823"", ""uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d2a80956-68f7-4b58-a063-3d7449412c37"", ""description"": ""The type of action affecting durability."", ""name"": ""Type of action"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""d2a80956-68f7-4b58-a063-3d7449412c37"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b7c277d6-9c44-4515-8060-f3f112026836"", ""description"": ""The intended lifespan of the structure as per EN 1990."", ""name"": ""Design working life"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""b7c277d6-9c44-4515-8060-f3f112026836"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""062734be-57a0-4d81-a8e0-7badc7a9c1c6"", ""description"": ""Methods to protect against corrosion including surface protection, weathering steel, and stainless steel."", ""name"": ""Corrosion protection methods"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""062734be-57a0-4d81-a8e0-7badc7a9c1c6"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""54e65188-6533-4932-88d9-c99b7ce42e5b"", ""description"": ""The required fatigue life of the structure as per EN 1993-1-9."", ""name"": ""Fatigue life"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""54e65188-6533-4932-88d9-c99b7ce42e5b"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7656cd6c-c5b0-48fc-a93a-9d434dbe73b4"", ""description"": ""Design considerations for wear resistance."", ""name"": ""Wearing design"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""7656cd6c-c5b0-48fc-a93a-9d434dbe73b4"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""fea415d0-afd3-4bd1-8712-fd582c9b5af9"", ""description"": ""Design considerations for accidental actions as per EN 1991-1-7."", ""name"": ""Accidental actions"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""fea415d0-afd3-4bd1-8712-fd582c9b5af9"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a11a3a22-0751-49bd-84ba-464dfaf8bc8b"", ""description"": ""Requirements for the inspection and maintenance of the structure."", ""name"": ""Inspection and maintenance"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""a11a3a22-0751-49bd-84ba-464dfaf8bc8b"", ""type"": ""variable""}]" +30,2.1.3.2,Design working life for buildings,"{""expression"": ""A C_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""description"": ""The design working life shall be taken as the period for which a building structure is expected to be used for its intended purpose."", ""parent_uuid"": ""a6159013-feed-4450-97b5-bdc376f1d1a2"", ""uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""years"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""46b599e7-3a18-4e6b-a3e7-469368db0bb3"", ""description"": ""The expected period for which a building structure is intended to be used."", ""name"": ""Design Working Life"", ""parent_uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""uuid"": ""46b599e7-3a18-4e6b-a3e7-469368db0bb3"", ""type"": ""variable""}]" +31,2.1.3.3,Durability for buildings,"{""expression"": ""A_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bcb49066-3c30-4e86-8de5-2f2e50d71d4a"", ""description"": ""Durability requirement for buildings and their components to be designed for environmental actions and fatigue or protected from them."", ""parent_uuid"": ""e0dbf84a-600c-47f2-995e-2c23fb3ad54c"", ""uuid"": ""bcb49066-3c30-4e86-8de5-2f2e50d71d4a"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] +32,2.1.3.3,Durability for buildings,"{""expression"": ""A C_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""829efb1e-e32f-411b-9fa0-7412b26aed13"", ""description"": ""Consideration of material deterioration, corrosion, or fatigue through appropriate material choice and structural redundancy."", ""parent_uuid"": ""e0dbf84a-600c-47f2-995e-2c23fb3ad54c"", ""uuid"": ""829efb1e-e32f-411b-9fa0-7412b26aed13"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] +33,2.2,Principles of limit state design,"{""expression"": ""\\\\sqrt{AC_{2}}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""description"": ""The resistance of cross-sections and members specified in Eurocode 3 for the ultimate limit states."", ""parent_uuid"": ""540541e0-7e44-418d-842c-c34f0f11701c"", ""uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""faa42d2e-f5b6-41e4-814f-87ba2e0efdc7"", ""description"": ""A coefficient related to the material properties and design conditions"", ""name"": ""AC_{2}"", ""parent_uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""uuid"": ""faa42d2e-f5b6-41e4-814f-87ba2e0efdc7"", ""type"": ""variable""}]" +34,2.3.1,Actions and environmental influences,"{""expression"": ""P_k + \\\\sqrt{A C_{2}} G_k"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""description"": ""The total action considering permanent actions and imposed deformations during erection."", ""parent_uuid"": ""e7314dfa-02f1-45ef-afba-035d411f2b81"", ""uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""851d8496-9194-4a28-a1f0-61e4866409a8"", ""description"": ""Nominal value of imposed deformations as permanent actions"", ""name"": ""P_k"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""851d8496-9194-4a28-a1f0-61e4866409a8"", ""type"": ""variable""}, {""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""af35d11d-a3ea-4eda-b869-5d8d889bf9d4"", ""description"": ""Other permanent actions"", ""name"": ""G_k"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""af35d11d-a3ea-4eda-b869-5d8d889bf9d4"", ""type"": ""variable""}, {""unit"": ""m^2"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bab1002c-f685-472e-8f50-cd69ab7f3f03"", ""description"": ""Area factor related to the structure"", ""name"": ""A"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""bab1002c-f685-472e-8f50-cd69ab7f3f03"", ""type"": ""variable""}, {""unit"": ""dimensionless"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""35395598-3a71-4043-9bfb-fe8455cc6dd2"", ""description"": ""Coefficient related to the structure"", ""name"": ""C_{2}"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""35395598-3a71-4043-9bfb-fe8455cc6dd2"", ""type"": ""variable""}]" +35,2.4.1,Design values of material properties,"{""expression"": ""AC_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""6c9c239c-dbf1-4ab7-9fdb-be272d5814e2"", ""description"": ""Design value of material properties as indicated in Eurocode."", ""parent_uuid"": ""62f5d44b-46e6-4795-9602-f81ec6ea8c32"", ""uuid"": ""6c9c239c-dbf1-4ab7-9fdb-be272d5814e2"", ""type"": ""equation"", ""variables"": []}",[] +36,2.4.3,Design resistances,"{""expression"": ""R_{d} = \\\\frac{R_{k}}{\\\\gamma_{M}} = \\\\frac{1}{\\\\gamma_{M}} R_{k}(\\\\eta_{1} X_{k, 1} ; \\\\eta_{i} X_{k, i} ; a_{d})"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""description"": ""Design resistance for steel structures as per EN 1990."", ""parent_uuid"": ""6d9634e0-07ca-4238-b4ab-21181c06ebc1"", ""uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bb4afe6b-45d0-4092-b592-f55b903bc397"", ""description"": ""Characteristic value of the particular resistance determined with characteristic or nominal values for the material properties and dimensions"", ""name"": ""R_k"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""bb4afe6b-45d0-4092-b592-f55b903bc397"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""1bda5452-168e-4894-859c-1b4f63fe689c"", ""description"": ""Global partial factor for the particular resistance"", ""name"": ""\\\\gamma_{M}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""1bda5452-168e-4894-859c-1b4f63fe689c"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8786a087-efc7-4dfa-b1d2-d9fcca727c47"", ""description"": ""Parameter related to the resistance"", ""name"": ""\\\\eta_{1}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""8786a087-efc7-4dfa-b1d2-d9fcca727c47"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""db075c65-76ef-4081-b689-e00fe502581d"", ""description"": ""Parameter related to the resistance"", ""name"": ""\\\\eta_{i}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""db075c65-76ef-4081-b689-e00fe502581d"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""255b88d1-b2a1-4f5c-83e4-b8947b76a30e"", ""description"": ""Characteristic value related to the first parameter"", ""name"": ""X_{k, 1}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""255b88d1-b2a1-4f5c-83e4-b8947b76a30e"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b84b73b0-ad29-4121-ab0b-12bfc55bc54a"", ""description"": ""Characteristic value related to the i-th parameter"", ""name"": ""X_{k, i}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""b84b73b0-ad29-4121-ab0b-12bfc55bc54a"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""471bbd0b-db2c-4760-84f1-dae70d957ead"", ""description"": ""Additional parameter related to the design resistance"", ""name"": ""a_{d}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""471bbd0b-db2c-4760-84f1-dae70d957ead"", ""type"": ""variable""}]" diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..03457e4 --- /dev/null +++ b/examples/__init__.py @@ -0,0 +1,32 @@ +# examples/example.py +import os +import pandas as pd +import sys +from pathlib import Path + +# Add the project root to the Python path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from src.pandas_query import PandasQuery + +# Data +data = [ + ('John Doe', 25, 50), + ('Jane Smith', 38, 70), + ('Alex Johnson', 45, 80), + ('Jessica Brown', 60, 40), + ('Michael Davis', 22, 90), +] +df = pd.DataFrame(data, columns=['name', 'age', 'donation']) + +# Create query executor +querier = PandasQuery() + +# Execute query +query = "What is the average donation of people older than 40 who donated more than $50?" +result = querier.execute(df, query) + +print(f"Query: {query}") +print(f"Generated code: {querier.last_code}") +print(f"Result: {result}") \ No newline at end of file diff --git a/examples/example-chatbot.py b/examples/example-chatbot.py deleted file mode 100644 index e07628b..0000000 --- a/examples/example-chatbot.py +++ /dev/null @@ -1,80 +0,0 @@ -import os -import pandas as pd - -import sys -from pathlib import Path -sys.path.append(str(Path(__file__).resolve().parent.parent)) - -from src import PandasLLM - -# Data -# Please note that these names, ages, and donations are randomly generated and do not correspond to real individuals or their donations. -data = [('John Doe', 25, 50), - ('Jane Smith', 38, 70), - ('Alex Johnson', 45, 80), - ('Jessica Brown', 60, 40), - ('Michael Davis', 22, 90), - ('Emily Wilson', 30, 60), - ('Daniel Taylor', 35, 75), - ('Sophia Moore', 40, 85), - ('David Thomas', 50, 65), - ('Olivia Jackson', 29, 55), - ('Carlos García', 22, 50), - ('Ana Rodriguez', 38, 70), - ('Luis Hernandez', 45, 80), - ('Sofia Martinez', 60, 40), - ('Miguel Lopez', 22, 90), - ('Isabella Gonzalez', 30, 60), - ('Diego Perez', 35, 75), - ('Maria Sanchez', 40, 85), - ('Juan Pena', 50, 65), - ('Gabriela Ramirez', 29, 55), - ('Giovanni Rossi', 22, 50), - ('Maria Bianchi', 38, 70), - ('Luca Ferrari', 45, 80), - ('Sofia Russo', 60, 40), - ('Francesco Romano', 22, 90), - ('Isabella Colombo', 30, 60), - ('Alessandro Ricci', 35, 75), - ('Giulia Marino', 40, 85), - ('Antonio Greco', 50, 65), - ('Gabriella Bruno', 29, 55)] - -# Create DataFrame -df = pd.DataFrame(data, columns=['name', 'age', 'donation']) - -# Print DataFrame -print(df) - - -def main(): - - # Initialise library and set the OpenAI API key - conv_df = PandasLLM(data=df, llm_api_key = os.environ.get("OPENAI_API_KEY")) - print() - banner = """ - Welcome to the Donation Data CLI. - The donation dataset has three columns (name, age, donation) - Please note that these names, ages, and donations are randomly generated and do not correspond to real individuals or their donations. - - You can ask questions like: - - show me the list of names - - What is the average age of people who donated? - - What is the average donation amount? - - What is the average donation of people older than 30? - - What is the average donation of people older than 30 who donated more than $50? - """ - print(banner) - - while True: - prompt = input("Enter your query (or 'exit' to quit): ") - if prompt.lower() == "exit": - break - - result = conv_df.prompt(prompt) - code = conv_df.code_block - print(f"Executing the following expression of type {type(result)}:\n{code}\n\nResult is:\n {result}\n") - - -if __name__ == "__main__": - main() diff --git a/examples/example.py b/examples/example.py index ea8a725..03457e4 100644 --- a/examples/example.py +++ b/examples/example.py @@ -1,34 +1,32 @@ # examples/example.py import os import pandas as pd - import sys from pathlib import Path -sys.path.append(str(Path(__file__).resolve().parent.parent)) -from src.pandas_llm import PandasLLM + +# Add the project root to the Python path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from src.pandas_query import PandasQuery # Data -# Please note that these names, ages, and donations are randomly generated -# and do not correspond to real individuals or their donations. -data = [('John Doe', 25, 50), - ('Jane Smith', 38, 70), - ('Alex Johnson', 45, 80), - ('Jessica Brown', 60, 40), - ('Michael Davis', 22, 90), - ('Emily Wilson', 30, 60), - ('Daniel Taylor', 35, 75), - ('Sophia Moore', 40, 85), - ('David Thomas', 50, 65), - ('Olivia Jackson', 29, 55)] +data = [ + ('John Doe', 25, 50), + ('Jane Smith', 38, 70), + ('Alex Johnson', 45, 80), + ('Jessica Brown', 60, 40), + ('Michael Davis', 22, 90), +] df = pd.DataFrame(data, columns=['name', 'age', 'donation']) -conv_df = PandasLLM(data=df, llm_api_key = os.environ.get("OPENAI_API_KEY")) -result = conv_df.prompt("What is the average donation of people older than 40 who donated more than $50?") -code = conv_df.code_block +# Create query executor +querier = PandasQuery() -print(f"Executing the following expression of type {type(result)}:\n{code}\n\nResult is:\n {result}\n") -# Executing the following expression of type : -# result = df.loc[(df['age'] > 40) & (df['donation'] > 50), 'donation'].mean() +# Execute query +query = "What is the average donation of people older than 40 who donated more than $50?" +result = querier.execute(df, query) -# Result is: -# 72.5 +print(f"Query: {query}") +print(f"Generated code: {querier.last_code}") +print(f"Result: {result}") \ No newline at end of file diff --git a/pyvenv.cfg b/pyvenv.cfg deleted file mode 100644 index f18227c..0000000 --- a/pyvenv.cfg +++ /dev/null @@ -1,3 +0,0 @@ -home = /usr/local/opt/python@3.9/bin -include-system-site-packages = false -version = 3.9.16 diff --git a/src/requirements.txt b/requirements.txt similarity index 100% rename from src/requirements.txt rename to requirements.txt diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 224a779..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -description-file = README.md \ No newline at end of file diff --git a/setup.py b/setup.py index b8cb8b6..440892c 100644 --- a/setup.py +++ b/setup.py @@ -1,55 +1,14 @@ from setuptools import setup, find_packages -# Reads the content of your README.md into a variable to be used in the setup below -with open("README.md", "r", encoding="utf-8") as fh: - long_description = fh.read() - setup( - name='src', # should match the package folder - version='0.0.6', # important for updates - license='MIT', # should match your chosen license - description='Conversational Pandas Dataframes', - long_description=long_description, # loads your README.md - long_description_content_type="text/markdown", # README.md is of type 'markdown' - author='DashyDash', - author_email='alessio@dashydash.com', - url='https://github.com/DashyDashOrg/pandas-llm', - project_urls = { # Optional - "Bug Tracker": "https://github.com/DashyDashOrg/pandas-llm/issues" - }, - keywords=["pypi", "pandas-llm", "pandas", "llm", "ai", "openai", "chatgpt"], #descriptive meta-data + name="pandas-llm", + version="0.1.0", packages=find_packages(), - classifiers=[ # https://pypi.org/classifiers - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3', - "Operating System :: OS Independent", - ], - python_requires='>=3.6', install_requires=[ - "aiohttp", - "aiosignal", - "async-timeout", - "attrs", - "certifi", - "charset-normalizer", - "frozenlist", - "idna", - "multidict", + "pandas", "numpy", "openai", - "pandas", - "python-dateutil", - "pytz", - "requests", "RestrictedPython", - "six", - "tqdm", - "tzdata", - "urllib3", - "yarl", - ], - download_url="https://github.com/DashyDashOrg/pandas-llm/releases/tag/v0.0.6", + ], + python_requires=">=3.7", ) \ No newline at end of file diff --git a/src/pandas_llm.py b/src/pandas_llm.py deleted file mode 100644 index 189c5d3..0000000 --- a/src/pandas_llm.py +++ /dev/null @@ -1,293 +0,0 @@ -# src/pandas_llm.py -import datetime -import numpy as np -from openai import OpenAI -import os -import re -import json -import pandas as pd -from dotenv import load_dotenv -load_dotenv() - -from src.sandbox import Sandbox - -# initialise the openai client -client = OpenAI() - -class PandasLLM(pd.DataFrame): - """ - PandasLLM is a subclass of the Pandas DataFrame class. It is designed to provide a - wrapper around the OpenAI API. - """ - - def __init__(self, - data, - llm_engine: str = "openai", - llm_kwargs={"model": "gpt-4o-mini", - "temperature": 0.2}, - prompt_override: bool = False, - custom_prompt: str = "", - path: str = None, - verbose: bool = False, - data_privacy: bool = True, - llm_api_key: str = None, - force_sandbox: bool = False, - *args, **kwargs): - """ - This is the constructor for the PandasLLM class. It takes in the following arguments: - data: The data to be used. It can be a Pandas DataFrame, a list of lists, a list of tuples, - a list of dictionaries, a dictionary, a string, or a list. - llm_engine: The name of the OpenAI engine to use. - llm_kwargs: A dictionary of parameters to be used with the OpenAI API. - prompt_override: A boolean that determines whether or not the prompt is overridden. - custom_prompt: A string that overrides the prompt. - path: The path to the file to be used. - verbose: A boolean that determines whether or not the output is verbose. - data_privacy: A boolean that determines whether or not the data is private. - llm_api_key: The OpenAI API key to be used. - force_sandbox: if False and the sandbox fails, it will retry using eval (less safe) - - The constructor also calls the parent class's constructor. - - - Args: - data (pandas dataframe, mandatory): dataset to query. Defaults to None. - llm_engine (str, optional): LLM engine, currently only OpenAI is supported. Defaults to "openai". - llm_kwargs (dict, optional): LLM engine parameters. Defaults to model=gpt-3.5-turbo and temperature=0.2". - prompt_override (bool, optional): if True, the custom prompt is mandatory and it will became the main prompt. Defaults to False. - custom_prompt (str, optional): if prompt_override is False, the custom prompt will be added to the default src prompt. Defaults to "". - path (str, optional): the path where the files containing debug data will be save. Defaults to None. - verbose (bool, optional): if True debugging info will be printed. Defaults to False. - data_privacy (bool, optional): if True, the function will not send the data content to OpenAI. Defaults to True. - llm_api_key (str, optional): the Open API key. Defaults to None. - force_sandbox (bool, optional): if False and the sandbox fails, it will retry using eval (less safe). Defaults to False. - """ - - super().__init__(data, *args, **kwargs) - - self.llm_kwargs = llm_kwargs or {} - - # Set up OpenAI API key from the environment or the config - self.llm_api_key = llm_api_key or os.environ.get("OPENAI_API_KEY") - - self.llm_engine = llm_engine - self.llm_kwargs = llm_kwargs or {} - self.model = self.llm_kwargs.get("model") - self.temperature = self.llm_kwargs.get("temperature") - - self.prompt_override = prompt_override - self.custom_prompt = custom_prompt - - self.data_privacy = data_privacy - self.path = path - self.verbose = verbose - self.force_sandbox = force_sandbox - self.code_blocks = [r'```python(.*?)```', r'```(.*?)```'] - - def _buildPromptForRole(self): - prompt_role = f""" -I want you to act as a data scientist and Python coder. I want you code for me. -I have a dataset of {len(self)} rows and {len(self.columns)} columns. -Columns and their type are the following: - """ - - for col in self.columns: - col_type = self.dtypes[col] - prompt_role += f"{col} ({col_type})\n" - - return prompt_role - - def _buildPromptForProblemSolving(self, request): - - if self.prompt_override: - return self.custom_prompt - - columns = "" - for col in self.columns: - col_type = self.dtypes[col] - columns += f"{col} ({col_type})\n" - - prompt_problem = f""" -Given a DataFrame named 'df' of {len(self)} rows and {len(self.columns)} columns, -Its columns are the following: - -{columns} - -I want you to solve the following problem: -write a Python code snippet that addresses the following request: -{request} - -While crafting the code, please follow these guidelines: -1. When comparing or searching for strings, use lower case letters, ignore case sensitivity, and apply a "contains" search. -2. Ensure that the answer is a single line of code without explanations, comments, or additional details. -3. If a single line solution is not possible, multiline solutions or functions are acceptable, but the code must end with an assignment to the variable 'result'. -4. Assign the resulting code to the variable 'result'. -5. Avoid importing any additional libraries than pandas and numpy. - -""" - if not self.custom_prompt is None and len(self.custom_prompt) > 0: - prompt_problem += f""" - Also: - {self.custom_prompt} - """ - - return prompt_problem - - def _extractPythonCode(self, text: str, regexp: str) -> str: - # Define the regular expression pattern for the Python code block - pattern = regexp - - # Search for the pattern in the input text - match = re.search(pattern, text, re.DOTALL) - - # If a match is found, return the extracted code (without the markers) - if match: - return match.group(1).strip() - - # If no match is found, return an empty string - return "" - - def _print(self, *args, **kwargs): - if self.verbose: - print(*args, **kwargs) - - # def _variable_to_string(self, variable): - # if variable is None: return None - # try: - - # if isinstance(variable, pd.Series): - # # convert to dataframe - # variable = variable.to_frame() - - # if isinstance(variable, pd.DataFrame): - # variable = variable.drop_duplicates() - # if len(variable) == 0: return None - # return str(variable) - - # elif isinstance(variable, np.ndarray): - # if len(variable) == 0: return None - # return np.array2string(variable) - # else: - # # Convert the variable to a string - # return str(variable) - # except Exception as e: - # return str(variable) - - def _save(self, name, value): - if self.path is None or self.path == "": - return - try: - with open(f"{self.path}/{name}", 'w') as file: - file.write(value) - except Exception as e: - self._print(f"error {e}") - return - - def _execInSandbox(self, df, generated_code: str): - - # Create a Sandbox instance and allow pandas to be imported - sandbox = Sandbox() - sandbox.allow_import("pandas") - sandbox.allow_import("numpy") - - # Define the initial code to set up the DataFrame - initial_code = f""" -import pandas as pd -import datetime -from pandas import Timestamp -import numpy as np - - """ - - # Combine the initial code and the generated code - full_code = initial_code + "\n" + generated_code - - self._save("temp/prompt_code.py", full_code) - # Execute the combined code in the Sandbox - sandbox_result = sandbox.execute(full_code, {"df": df}) - - # Get the result from the local_vars dictionary - result = sandbox_result.get("result") - return result - - def prompt(self, request: str): - """ - - Args: - request (str): prompt containing the request. it must be expressed as a question or a problem to solve - - Returns: - Any: contains the result or solution of the problem. Tipically the result data type is a dataframe, a Series or a float - """ - - - - self.llm_kwargs['messages'] = [ - {"role": "system", - "content": self._buildPromptForRole()}, - {"role": "user", - "content": self._buildPromptForProblemSolving(request) - } - ] - - response = None - for times in range(0, 3): - try: - response = client.chat.completions.create( - **self.llm_kwargs - ) - break; - except Exception as e: - self._print(f"error {e}") - continue - - if response is None: - return "Please try later" - - self._save("temp/prompt_cmd.json", json.dumps(self.llm_kwargs['messages'], indent=4)) - - generated_code = response.choices[0].message.content - if generated_code == "" or generated_code is None: - self.code_block = "" - return None - - self.code_block = generated_code - - results = [] - for regexp in self.code_blocks: - cleaned_code = self._extractPythonCode(generated_code, regexp) - if cleaned_code == "" or cleaned_code is None: - continue - results.append(cleaned_code) - results.append(generated_code) - - if len(results) == 0: - return None - - result = None - for cleaned_code in results: - - try: - result = self._execInSandbox(self, cleaned_code) - except Exception as e: - self._print(f"error {e}") - if not self.force_sandbox: - try: - expression = re.sub(r"^\s*result\s*=", "", cleaned_code).strip() - result = eval(expression, - {'df': self, 'pd': pd, 'np': np, 'datetime': datetime, 'result': result}) - except Exception as e: - self._print(f"error {e}") - pass - - if result is not None and str(result) != "": - break - - if self.data_privacy == True: - # non formatted result - return result - - # currently the privacy option is not needed. - # in the future, we can choose to send data to LLM if privacy is set to false - - return result \ No newline at end of file diff --git a/src/pandas_query.py b/src/pandas_query.py new file mode 100644 index 0000000..73112ba --- /dev/null +++ b/src/pandas_query.py @@ -0,0 +1,120 @@ +from openai import OpenAI +import pandas as pd +import numpy as np +from typing import Any, Optional, Dict +import os +from RestrictedPython import compile_restricted +from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence +from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter + + +class PandasQuery: + """ + A streamlined class for executing natural language queries on pandas DataFrames using OpenAI's LLM, + with sandbox protection. + """ + + def __init__( + self, + model: str = "gpt-4", + temperature: float = 0.2, + api_key: Optional[str] = None + ): + self.client = OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY")) + self.model = model + self.temperature = temperature + self.last_code = None + + # Set up sandbox environment + self.restricted_globals = { + "__builtins__": dict(safe_builtins), + "pd": pd, + "np": np, + "_getattr_": default_guarded_getattr, + "_getitem_": default_guarded_getitem, + "_getiter_": default_guarded_getiter, + "_iter_unpack_sequence_": guarded_iter_unpack_sequence, + } + + # Add safe pandas Series methods + series_methods = [ + "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", + "diff", "dropna", "fillna", "head", "idxmax", "idxmin", + "max", "min", "notna", "prod", "quantile", "rename", "round", + "tail", "to_frame", "to_list", "to_numpy", "unique", + "sort_index", "sort_values", "aggregate" + ] + self.restricted_globals.update({ + method: getattr(pd.Series, method) for method in series_methods + }) + + def _build_prompt(self, df: pd.DataFrame, query: str) -> str: + columns_info = "\n".join(f"- {col} ({df[col].dtype})" for col in df.columns) + + return f"""Given a pandas DataFrame with {len(df)} rows and the following columns: +{columns_info} + +Write a single line of Python code that answers this question: {query} + +Guidelines: +- Use only pandas and numpy operations +- String comparisons should be case-insensitive +- Assign the result to a variable named 'result' +- Return only the code, no explanations +""" + + def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: + """Execute code in RestrictedPython sandbox""" + try: + # Compile the code in restricted mode + byte_code = compile_restricted( + source=code, + filename='', + mode='exec' + ) + + # Create local namespace with just the dataframe + local_vars = {'df': df, 'result': None} + + # Execute in sandbox + exec(byte_code, self.restricted_globals, local_vars) + + return local_vars['result'] + + except Exception as e: + raise RuntimeError(f"Sandbox execution failed. Code: {code}. Error: {str(e)}") + + def execute(self, df: pd.DataFrame, query: str) -> Any: + """ + Execute a natural language query on a pandas DataFrame using sandbox protection. + + Args: + df: The pandas DataFrame to query + query: Natural language query string + + Returns: + Query result (could be DataFrame, Series, scalar, etc.) + """ + # Get code from LLM + response = self.client.chat.completions.create( + model=self.model, + temperature=self.temperature, + messages=[ + {"role": "user", "content": self._build_prompt(df, query)} + ] + ) + + code = response.choices[0].message.content.strip() + + # Clean up code if it's in a code block + if code.startswith("```"): + code = code.split("\n", 1)[1].rsplit("\n", 1)[0] + if code.startswith("python"): + code = code.split("\n", 1)[1] + code = code.strip("` \n") + + # Store for reference + self.last_code = code + + # Execute in sandbox + return self._execute_in_sandbox(code, df) \ No newline at end of file diff --git a/src/sandbox.py b/src/sandbox.py deleted file mode 100644 index 993a646..0000000 --- a/src/sandbox.py +++ /dev/null @@ -1,53 +0,0 @@ -# src/sandbox.py -import pandas as pd -from RestrictedPython import compile_restricted -from RestrictedPython.Guards import safe_builtins,guarded_iter_unpack_sequence -from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter - -class Sandbox: - def __init__(self): - self._allowed_imports = {} - - def allow_import(self, module_name): - try: - module = __import__(module_name) - self._allowed_imports[module_name] = module - except ImportError: - pass - - def execute(self, code, local_vars = {}): - allowed_builtins = safe_builtins - # Add __builtins__, __import__, and allowed imports to the globals - restricted_globals = {"__builtins__": allowed_builtins} - restricted_globals.update(self._allowed_imports) - - builtin_mappings = { - "__import__": __import__, - "_getattr_": default_guarded_getattr, - "_getitem_": default_guarded_getitem, - "_getiter_": default_guarded_getiter, - "_iter_unpack_sequence_": guarded_iter_unpack_sequence, - "list": list, - "set": set, - "pd": pd, - } - - series_methods = [ - "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", "cumprod", "diff", - "dropna", "fillna", "head", "idxmax", "idxmin", "last", "max", "min", "notna", - "prod", "quantile", "rename", "round", "tail", "to_frame", "to_list", "to_numpy", - "to_string","unique", "sort_index", "sort_values", "aggregate" - ] - - - builtin_mappings.update({method: getattr(pd.Series, method) for method in series_methods}) - - restricted_globals["__builtins__"].update(builtin_mappings) - - byte_code = compile_restricted(source=code, filename='', mode='exec') - - # Execute the restricted code - exec(byte_code, restricted_globals, local_vars) - - return local_vars - diff --git a/update_requirements.py b/update_requirements.py deleted file mode 100644 index 5a352e6..0000000 --- a/update_requirements.py +++ /dev/null @@ -1,13 +0,0 @@ -import subprocess - -def install_or_update_packages(requirements_file: str): - with open(requirements_file, 'r') as file: - packages = file.readlines() - - for package in packages: - package_name = package.strip() - if package_name: - subprocess.run(['pip', 'install', '--upgrade', package_name]) - -# Example usage -install_or_update_packages('src/requirements.txt') \ No newline at end of file From 116c0a83dbce7a59d025880dd689973513ca1047 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 12:29:23 +0000 Subject: [PATCH 3/9] pandas_query.py update --- src/pandas_query.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/pandas_query.py b/src/pandas_query.py index 73112ba..d293bd7 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -1,7 +1,8 @@ +# src/pandas_query.py from openai import OpenAI import pandas as pd import numpy as np -from typing import Any, Optional, Dict +from typing import Any, Optional import os from RestrictedPython import compile_restricted from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence @@ -11,7 +12,7 @@ class PandasQuery: """ A streamlined class for executing natural language queries on pandas DataFrames using OpenAI's LLM, - with sandbox protection. + with sandbox protection and NaN handling. """ def __init__( @@ -42,7 +43,7 @@ def __init__( "diff", "dropna", "fillna", "head", "idxmax", "idxmin", "max", "min", "notna", "prod", "quantile", "rename", "round", "tail", "to_frame", "to_list", "to_numpy", "unique", - "sort_index", "sort_values", "aggregate" + "sort_index", "sort_values", "aggregate", "isna", "fillna" ] self.restricted_globals.update({ method: getattr(pd.Series, method) for method in series_methods @@ -58,9 +59,16 @@ def _build_prompt(self, df: pd.DataFrame, query: str) -> str: Guidelines: - Use only pandas and numpy operations -- String comparisons should be case-insensitive +- Always handle NaN/null values in string operations using fillna('') +- For string operations, first handle NaN values, then do the comparison +- For case-insensitive string comparisons, use .str.lower() - Assign the result to a variable named 'result' - Return only the code, no explanations + +Example of correct string operations with NaN handling: +- Wrong: df['col'].str.startswith('prefix') # will fail on NaN +- Right: df['col'].fillna('').str.startswith('prefix') # handles NaN +- Right: df['col'].fillna('').str.lower().str.startswith('prefix') # case-insensitive """ def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: From 2ec83b3f082778de227849ff2f9bfa6d2eb868b7 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 12:50:24 +0000 Subject: [PATCH 4/9] pandas_query.py working with validator --- src/pandas_query.py | 126 +++++++++++++++++----- src/pandas_validator.py | 227 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 325 insertions(+), 28 deletions(-) create mode 100644 src/pandas_validator.py diff --git a/src/pandas_query.py b/src/pandas_query.py index d293bd7..0f57b7d 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -1,30 +1,32 @@ -# src/pandas_query.py from openai import OpenAI import pandas as pd import numpy as np -from typing import Any, Optional +from typing import Any, Optional, Dict import os from RestrictedPython import compile_restricted from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter +from .pandas_validator import validate_pandas_query +import logging + +logger = logging.getLogger(__name__) class PandasQuery: - """ - A streamlined class for executing natural language queries on pandas DataFrames using OpenAI's LLM, - with sandbox protection and NaN handling. - """ + """A streamlined class for executing natural language queries on pandas DataFrames using OpenAI's LLM.""" def __init__( self, model: str = "gpt-4", temperature: float = 0.2, - api_key: Optional[str] = None + api_key: Optional[str] = None, + validate: bool = True ): self.client = OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY")) self.model = model self.temperature = temperature self.last_code = None + self.validate = validate # Set up sandbox environment self.restricted_globals = { @@ -38,7 +40,7 @@ def __init__( } # Add safe pandas Series methods - series_methods = [ + self.series_methods = [ "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", "diff", "dropna", "fillna", "head", "idxmax", "idxmin", "max", "min", "notna", "prod", "quantile", "rename", "round", @@ -46,34 +48,87 @@ def __init__( "sort_index", "sort_values", "aggregate", "isna", "fillna" ] self.restricted_globals.update({ - method: getattr(pd.Series, method) for method in series_methods + method: getattr(pd.Series, method) for method in self.series_methods }) def _build_prompt(self, df: pd.DataFrame, query: str) -> str: - columns_info = "\n".join(f"- {col} ({df[col].dtype})" for col in df.columns) + """ + Build a detailed prompt for the LLM that includes DataFrame information + and guidelines for generating safe, valid code. + """ + # Get detailed column information + column_info = [] + for col in df.columns: + dtype = df[col].dtype + null_count = df[col].isna().sum() + unique_count = df[col].nunique() + sample = str(df[col].iloc[:3].tolist()) + + column_info.append( + f"- {col} ({dtype}):\n" + f" * Null values: {null_count}\n" + f" * Unique values: {unique_count}\n" + f" * Sample values: {sample}" + ) + + column_details = "\n".join(column_info) + + # Build comprehensive prompt + prompt = f"""Given a pandas DataFrame with {len(df)} rows and the following columns: - return f"""Given a pandas DataFrame with {len(df)} rows and the following columns: -{columns_info} +{column_details} Write a single line of Python code that answers this question: {query} Guidelines: -- Use only pandas and numpy operations -- Always handle NaN/null values in string operations using fillna('') -- For string operations, first handle NaN values, then do the comparison -- For case-insensitive string comparisons, use .str.lower() -- Assign the result to a variable named 'result' -- Return only the code, no explanations - -Example of correct string operations with NaN handling: -- Wrong: df['col'].str.startswith('prefix') # will fail on NaN -- Right: df['col'].fillna('').str.startswith('prefix') # handles NaN -- Right: df['col'].fillna('').str.lower().str.startswith('prefix') # case-insensitive +1. Basic Requirements: + - Use only pandas and numpy operations + - Assign the result to a variable named 'result' + - Return only the code, no explanations + +2. String Operations: + - Always handle null values before string operations using fillna('') + - Use str.lower() for case-insensitive comparisons + - Use str.contains() instead of direct string matching when appropriate + +3. Data Type Considerations: + - Use appropriate methods for each data type + - For dates: Use dt accessor for date components + - For numbers: Use appropriate numeric operations + - For strings: Use str accessor methods + +4. Null Value Handling: + - Always consider null values in your operations + - Use fillna() or dropna() as appropriate + - For string operations, use fillna('') before the operation + +5. Available Series Methods: + {', '.join(self.series_methods)} + +Example valid patterns: +- result = df[df['text_column'].fillna('').str.lower().str.contains('pattern')] +- result = df.groupby('column')['value'].mean() +- result = df[df['number'] > df['number'].mean()] """ + return prompt def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: - """Execute code in RestrictedPython sandbox""" + """ + Execute code in RestrictedPython sandbox with comprehensive error handling + and safety checks. + """ try: + # Pre-execution validation + if self.validate: + validation_result = validate_pandas_query(df, code, logger) + if not validation_result['is_valid']: + logger.warning("Pre-execution validation failed:") + for error in validation_result['errors']: + logger.warning(f"- {error}") + if validation_result['suggested_correction']: + logger.info("Using suggested correction") + code = validation_result['suggested_correction'] + # Compile the code in restricted mode byte_code = compile_restricted( source=code, @@ -87,14 +142,26 @@ def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: # Execute in sandbox exec(byte_code, self.restricted_globals, local_vars) - return local_vars['result'] + # Post-execution type checking + result = local_vars['result'] + if result is None: + raise ValueError("Execution produced no result") + + # Log successful execution + logger.info(f"Successfully executed code: {code}") + logger.info(f"Result type: {type(result)}") + + return result except Exception as e: - raise RuntimeError(f"Sandbox execution failed. Code: {code}. Error: {str(e)}") + error_msg = f"Sandbox execution failed. Code: {code}. Error: {str(e)}" + logger.error(error_msg) + raise RuntimeError(error_msg) def execute(self, df: pd.DataFrame, query: str) -> Any: """ - Execute a natural language query on a pandas DataFrame using sandbox protection. + Execute a natural language query on a pandas DataFrame using sandbox protection + and validation. Args: df: The pandas DataFrame to query @@ -103,6 +170,8 @@ def execute(self, df: pd.DataFrame, query: str) -> Any: Returns: Query result (could be DataFrame, Series, scalar, etc.) """ + logger.info(f"Executing query: {query}") + # Get code from LLM response = self.client.chat.completions.create( model=self.model, @@ -114,7 +183,7 @@ def execute(self, df: pd.DataFrame, query: str) -> Any: code = response.choices[0].message.content.strip() - # Clean up code if it's in a code block + # Clean up code if code.startswith("```"): code = code.split("\n", 1)[1].rsplit("\n", 1)[0] if code.startswith("python"): @@ -123,6 +192,7 @@ def execute(self, df: pd.DataFrame, query: str) -> Any: # Store for reference self.last_code = code + logger.info(f"Generated code: {code}") # Execute in sandbox return self._execute_in_sandbox(code, df) \ No newline at end of file diff --git a/src/pandas_validator.py b/src/pandas_validator.py new file mode 100644 index 0000000..287289e --- /dev/null +++ b/src/pandas_validator.py @@ -0,0 +1,227 @@ +import re +import pandas as pd +import numpy as np +from typing import Dict, List, Set, Tuple, Optional +import logging + + + + +class PandasQueryValidator: + """Validates pandas query operations and provides suggestions for corrections.""" + + def __init__(self, df: pd.DataFrame, logger: logging.Logger): + """Initialize validator with DataFrame schema information.""" + self.dtypes = df.dtypes.to_dict() + self.columns = set(df.columns) + + # Valid pandas operations by data type + self.valid_operations = { + 'object': { + 'string_ops': {'contains', 'startswith', 'endswith', 'lower', 'upper', 'strip', 'len'}, + 'comparisons': {'==', '!=', 'isin'} + }, + 'number': { + 'numeric_ops': {'sum', 'mean', 'min', 'max', 'count', 'median'}, + 'comparisons': {'>', '<', '>=', '<=', '==', '!='} + }, + 'datetime': { + 'date_ops': {'year', 'month', 'day', 'hour', 'minute'}, + 'comparisons': {'>', '<', '>=', '<=', '==', '!='} + }, + 'bool': { + 'bool_ops': {'any', 'all'}, + 'comparisons': {'==', '!='} + } + } + + # Common pandas aggregation functions + self.valid_aggregations = { + 'sum', 'mean', 'median', 'min', 'max', 'count', + 'std', 'var', 'first', 'last' + } + + # Valid pandas commands and their requirements + self.valid_commands = { + 'groupby': {'columns'}, + 'agg': {'groupby'}, + 'sort_values': {'columns'}, + 'fillna': {'value'}, + 'dropna': set(), + 'reset_index': set(), + 'merge': {'right', 'on', 'how'}, + 'join': {'on'}, + 'head': set(), + 'tail': set() + } + self.logger = logger + self.logger.info("PandasQueryValidator initialized successfully") + + def _extract_column_references(self, code: str) -> List[str]: + """Extract column references from the code.""" + # Match patterns like df['column'] or df.column + pattern = r"df[\['](\w+)[\]']|df\.(\w+)" + matches = re.findall(pattern, code) + # Flatten and filter matches + columns = {match[0] or match[1] for match in matches} + self.logger.debug(f"Extracted columns: {columns}") + return list(columns) + + def _extract_operations(self, code: str) -> List[str]: + """Extract pandas operations from the code.""" + # Match method calls on df or column references + pattern = r'\.(\w+)\(' + operations = re.findall(pattern, code) + self.logger.debug(f"Extracted operations: {operations}") + return operations + + def _check_column_existence(self, code: str) -> List[str]: + """Check if all referenced columns exist in the DataFrame.""" + errors = [] + referenced_columns = self._extract_column_references(code) + + for col in referenced_columns: + if col not in self.columns: + error_msg = f"Column '{col}' does not exist in DataFrame" + errors.append(error_msg) + self.logger.warning(error_msg) + + return errors + + def _check_operation_compatibility(self, code: str) -> List[str]: + """Check if operations are compatible with column data types.""" + errors = [] + operations = self._extract_operations(code) + column_refs = self._extract_column_references(code) + + for col in column_refs: + if col not in self.columns: + continue + + dtype = self.dtypes[col] + dtype_category = 'number' if pd.api.types.is_numeric_dtype(dtype) else \ + 'datetime' if pd.api.types.is_datetime64_dtype(dtype) else \ + 'bool' if pd.api.types.is_bool_dtype(dtype) else 'object' + + valid_ops = set() + if dtype_category in self.valid_operations: + for ops in self.valid_operations[dtype_category].values(): + valid_ops.update(ops) + + for op in operations: + if op not in valid_ops and op not in self.valid_commands: + error_msg = f"Operation '{op}' may not be compatible with column '{col}' of type {dtype}" + errors.append(error_msg) + self.logger.warning(error_msg) + + return errors + + def _check_null_handling(self, code: str) -> List[str]: + """Check for proper null value handling.""" + errors = [] + + # Check for string operations without null handling + if any(op in code for op in ['.str.', '.dt.']): + if 'fillna' not in code and 'dropna' not in code: + error_msg = "String or datetime operations detected without null handling" + errors.append(error_msg) + self.logger.warning(error_msg) + + return errors + + def _check_aggregation_usage(self, code: str) -> List[str]: + """Check for valid aggregation function usage.""" + errors = [] + operations = self._extract_operations(code) + + for op in operations: + if op.lower() in self.valid_aggregations: + # Check if groupby is used before aggregation + if 'groupby' not in code and not any(c in code for c in ['sum()', 'mean()', 'count()']): + error_msg = f"Aggregation '{op}' used without groupby" + errors.append(error_msg) + self.logger.warning(error_msg) + + return errors + + def suggest_corrections(self, code: str) -> Optional[str]: + """Attempt to suggest corrections for common issues.""" + self.logger.info("Attempting to suggest corrections") + + corrected = code + + # Fix column name case sensitivity + for col in self._extract_column_references(code): + if col not in self.columns: + for actual_col in self.columns: + if col.lower() == actual_col.lower(): + corrected = corrected.replace(f"['{col}']", f"['{actual_col}']") + corrected = corrected.replace(f".{col}", f".{actual_col}") + self.logger.info(f"Suggested correction for column name: {col} -> {actual_col}") + + # Add null handling for string operations + if '.str.' in corrected and 'fillna' not in corrected: + corrected = corrected.replace('.str.', '.fillna("").str.') + self.logger.info("Added null handling for string operations") + + # Suggest proper aggregation syntax + if any(op in corrected for op in self.valid_aggregations) and 'groupby' not in corrected: + self.logger.info("Suggested using groupby before aggregation") + + if corrected != code: + return corrected + return None + + def validate_query(self, code: str) -> Tuple[bool, List[str]]: + """ + Validate a pandas query code. + + Returns: + Tuple[bool, List[str]]: (is_valid, list_of_errors) + """ + errors = [] + self.logger.info("Starting pandas query validation") + + # Run all checks + errors.extend(self._check_column_existence(code)) + errors.extend(self._check_operation_compatibility(code)) + errors.extend(self._check_null_handling(code)) + errors.extend(self._check_aggregation_usage(code)) + + is_valid = len(errors) == 0 + if is_valid: + self.logger.info("Query is valid") + else: + self.logger.info("Query validation failed with errors") + + return is_valid, errors + + +def validate_pandas_query(df: pd.DataFrame, code: str, logger: logging.Logger) -> Dict: + """ + Validate a pandas query and suggest corrections if needed. + + Args: + df: Input DataFrame + code: Pandas query code to validate + + Returns: + Dictionary containing: + - 'code': Original code string + - 'is_valid': Boolean indicating if code is valid + - 'errors': List of validation errors + - 'suggested_correction': Suggested correction string or None + """ + validator = PandasQueryValidator(df, logger) + is_valid, errors = validator.validate_query(code) + suggested_correction = None + + if not is_valid: + suggested_correction = validator.suggest_corrections(code) + + return { + 'code': code.strip(), + 'is_valid': is_valid, + 'errors': errors, + 'suggested_correction': suggested_correction + } \ No newline at end of file From c46644a90c3a2ff1b825c393ab1ee89943a6734e Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 13:16:06 +0000 Subject: [PATCH 5/9] pandas_query.py working with validator - before refactor --- src/pandas_query.py | 129 +++++++++++++++++----------------------- src/pandas_validator.py | 80 +++++++++---------------- 2 files changed, 84 insertions(+), 125 deletions(-) diff --git a/src/pandas_query.py b/src/pandas_query.py index 0f57b7d..7346e6d 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -6,10 +6,7 @@ from RestrictedPython import compile_restricted from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter -from .pandas_validator import validate_pandas_query -import logging - -logger = logging.getLogger(__name__) +from .pandas_validator import PandasQueryValidator class PandasQuery: @@ -39,46 +36,53 @@ def __init__( "_iter_unpack_sequence_": guarded_iter_unpack_sequence, } - # Add safe pandas Series methods + # Core pandas Series methods (excluding string accessor methods) self.series_methods = [ "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", "diff", "dropna", "fillna", "head", "idxmax", "idxmin", "max", "min", "notna", "prod", "quantile", "rename", "round", "tail", "to_frame", "to_list", "to_numpy", "unique", - "sort_index", "sort_values", "aggregate", "isna", "fillna" + "sort_index", "sort_values", "aggregate", "isna", "astype" ] + + # Add series methods to restricted globals self.restricted_globals.update({ method: getattr(pd.Series, method) for method in self.series_methods }) - def _build_prompt(self, df: pd.DataFrame, query: str) -> str: - """ - Build a detailed prompt for the LLM that includes DataFrame information - and guidelines for generating safe, valid code. - """ + def _build_prompt(self, df: pd.DataFrame, query: str, n: int = 5) -> str: + """Build a detailed prompt with DataFrame information and query context.""" # Get detailed column information column_info = [] for col in df.columns: dtype = df[col].dtype null_count = df[col].isna().sum() unique_count = df[col].nunique() - sample = str(df[col].iloc[:3].tolist()) + + # Get appropriate sample values and range info + sample_vals = df[col].sample(min(n, df[col].count())) + if pd.api.types.is_numeric_dtype(dtype): + try: + range_info = f"Range: {df[col].min()} to {df[col].max()}" + except: + range_info = f"Sample values: {list(sample_vals)}" + else: + range_info = f"Sample values: {list(sample_vals)}" column_info.append( f"- {col} ({dtype}):\n" + f" * {range_info}\n" f" * Null values: {null_count}\n" - f" * Unique values: {unique_count}\n" - f" * Sample values: {sample}" + f" * Unique values: {unique_count}" ) - column_details = "\n".join(column_info) - - # Build comprehensive prompt prompt = f"""Given a pandas DataFrame with {len(df)} rows and the following columns: -{column_details} +{chr(10).join(column_info)} + +Write a single line of Python code that answers this question: -Write a single line of Python code that answers this question: {query} +{query} Guidelines: 1. Basic Requirements: @@ -86,47 +90,41 @@ def _build_prompt(self, df: pd.DataFrame, query: str) -> str: - Assign the result to a variable named 'result' - Return only the code, no explanations -2. String Operations: - - Always handle null values before string operations using fillna('') - - Use str.lower() for case-insensitive comparisons - - Use str.contains() instead of direct string matching when appropriate - -3. Data Type Considerations: - - Use appropriate methods for each data type - - For dates: Use dt accessor for date components - - For numbers: Use appropriate numeric operations - - For strings: Use str accessor methods - -4. Null Value Handling: - - Always consider null values in your operations - - Use fillna() or dropna() as appropriate - - For string operations, use fillna('') before the operation - -5. Available Series Methods: - {', '.join(self.series_methods)} - -Example valid patterns: -- result = df[df['text_column'].fillna('').str.lower().str.contains('pattern')] -- result = df.groupby('column')['value'].mean() -- result = df[df['number'] > df['number'].mean()] +2. Type-Specific Operations: + - For numeric operations on string numbers: Use pd.to_numeric(df['column'], errors='coerce') + - For string comparisons: Use .fillna('').str.lower() + - For string pattern matching: Use .str.contains() or .str.startswith() + - For datetime comparisons: Use .dt accessor + +3. Null Handling: + - Always handle null values before operations + - Use fillna() for string operations + - Use dropna() or fillna() for numeric operations + +4. Available Methods: + Core methods: {', '.join(self.series_methods)} + String operations available via .str accessor + DateTime operations available via .dt accessor + +Example patterns: +- String to number comparison: result = df[pd.to_numeric(df['column'], errors='coerce') > 5] +- Case-insensitive search: result = df[df['column'].fillna('').str.lower().str.contains('pattern')] +- Section number filtering: result = df[df['section_number'].fillna('').str.startswith('6')] """ return prompt def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: - """ - Execute code in RestrictedPython sandbox with comprehensive error handling - and safety checks. - """ + """Execute code in RestrictedPython sandbox with validation.""" try: # Pre-execution validation if self.validate: - validation_result = validate_pandas_query(df, code, logger) + validator = PandasQueryValidator(df) + validation_result = validator.validate_pandas_query(code) if not validation_result['is_valid']: - logger.warning("Pre-execution validation failed:") for error in validation_result['errors']: - logger.warning(f"- {error}") + print(f"Warning: {error}") if validation_result['suggested_correction']: - logger.info("Using suggested correction") + print("Using suggested correction") code = validation_result['suggested_correction'] # Compile the code in restricted mode @@ -136,43 +134,28 @@ def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: mode='exec' ) - # Create local namespace with just the dataframe - local_vars = {'df': df, 'result': None} + # Create local namespace with DataFrame and numeric conversion function + local_vars = { + 'df': df, + 'result': None, + 'pd': pd + } # Execute in sandbox exec(byte_code, self.restricted_globals, local_vars) - # Post-execution type checking result = local_vars['result'] if result is None: raise ValueError("Execution produced no result") - # Log successful execution - logger.info(f"Successfully executed code: {code}") - logger.info(f"Result type: {type(result)}") - return result except Exception as e: error_msg = f"Sandbox execution failed. Code: {code}. Error: {str(e)}" - logger.error(error_msg) raise RuntimeError(error_msg) def execute(self, df: pd.DataFrame, query: str) -> Any: - """ - Execute a natural language query on a pandas DataFrame using sandbox protection - and validation. - - Args: - df: The pandas DataFrame to query - query: Natural language query string - - Returns: - Query result (could be DataFrame, Series, scalar, etc.) - """ - logger.info(f"Executing query: {query}") - - # Get code from LLM + """Execute a natural language query with validation.""" response = self.client.chat.completions.create( model=self.model, temperature=self.temperature, @@ -190,9 +173,7 @@ def execute(self, df: pd.DataFrame, query: str) -> Any: code = code.split("\n", 1)[1] code = code.strip("` \n") - # Store for reference self.last_code = code - logger.info(f"Generated code: {code}") # Execute in sandbox return self._execute_in_sandbox(code, df) \ No newline at end of file diff --git a/src/pandas_validator.py b/src/pandas_validator.py index 287289e..1900d2d 100644 --- a/src/pandas_validator.py +++ b/src/pandas_validator.py @@ -2,15 +2,13 @@ import pandas as pd import numpy as np from typing import Dict, List, Set, Tuple, Optional -import logging - class PandasQueryValidator: """Validates pandas query operations and provides suggestions for corrections.""" - def __init__(self, df: pd.DataFrame, logger: logging.Logger): + def __init__(self, df: pd.DataFrame): """Initialize validator with DataFrame schema information.""" self.dtypes = df.dtypes.to_dict() self.columns = set(df.columns) @@ -54,8 +52,7 @@ def __init__(self, df: pd.DataFrame, logger: logging.Logger): 'head': set(), 'tail': set() } - self.logger = logger - self.logger.info("PandasQueryValidator initialized successfully") + def _extract_column_references(self, code: str) -> List[str]: """Extract column references from the code.""" @@ -64,7 +61,6 @@ def _extract_column_references(self, code: str) -> List[str]: matches = re.findall(pattern, code) # Flatten and filter matches columns = {match[0] or match[1] for match in matches} - self.logger.debug(f"Extracted columns: {columns}") return list(columns) def _extract_operations(self, code: str) -> List[str]: @@ -72,7 +68,6 @@ def _extract_operations(self, code: str) -> List[str]: # Match method calls on df or column references pattern = r'\.(\w+)\(' operations = re.findall(pattern, code) - self.logger.debug(f"Extracted operations: {operations}") return operations def _check_column_existence(self, code: str) -> List[str]: @@ -84,7 +79,6 @@ def _check_column_existence(self, code: str) -> List[str]: if col not in self.columns: error_msg = f"Column '{col}' does not exist in DataFrame" errors.append(error_msg) - self.logger.warning(error_msg) return errors @@ -112,7 +106,6 @@ def _check_operation_compatibility(self, code: str) -> List[str]: if op not in valid_ops and op not in self.valid_commands: error_msg = f"Operation '{op}' may not be compatible with column '{col}' of type {dtype}" errors.append(error_msg) - self.logger.warning(error_msg) return errors @@ -125,7 +118,6 @@ def _check_null_handling(self, code: str) -> List[str]: if 'fillna' not in code and 'dropna' not in code: error_msg = "String or datetime operations detected without null handling" errors.append(error_msg) - self.logger.warning(error_msg) return errors @@ -140,13 +132,11 @@ def _check_aggregation_usage(self, code: str) -> List[str]: if 'groupby' not in code and not any(c in code for c in ['sum()', 'mean()', 'count()']): error_msg = f"Aggregation '{op}' used without groupby" errors.append(error_msg) - self.logger.warning(error_msg) return errors def suggest_corrections(self, code: str) -> Optional[str]: """Attempt to suggest corrections for common issues.""" - self.logger.info("Attempting to suggest corrections") corrected = code @@ -157,16 +147,10 @@ def suggest_corrections(self, code: str) -> Optional[str]: if col.lower() == actual_col.lower(): corrected = corrected.replace(f"['{col}']", f"['{actual_col}']") corrected = corrected.replace(f".{col}", f".{actual_col}") - self.logger.info(f"Suggested correction for column name: {col} -> {actual_col}") # Add null handling for string operations if '.str.' in corrected and 'fillna' not in corrected: corrected = corrected.replace('.str.', '.fillna("").str.') - self.logger.info("Added null handling for string operations") - - # Suggest proper aggregation syntax - if any(op in corrected for op in self.valid_aggregations) and 'groupby' not in corrected: - self.logger.info("Suggested using groupby before aggregation") if corrected != code: return corrected @@ -180,7 +164,6 @@ def validate_query(self, code: str) -> Tuple[bool, List[str]]: Tuple[bool, List[str]]: (is_valid, list_of_errors) """ errors = [] - self.logger.info("Starting pandas query validation") # Run all checks errors.extend(self._check_column_existence(code)) @@ -189,39 +172,34 @@ def validate_query(self, code: str) -> Tuple[bool, List[str]]: errors.extend(self._check_aggregation_usage(code)) is_valid = len(errors) == 0 - if is_valid: - self.logger.info("Query is valid") - else: - self.logger.info("Query validation failed with errors") return is_valid, errors -def validate_pandas_query(df: pd.DataFrame, code: str, logger: logging.Logger) -> Dict: - """ - Validate a pandas query and suggest corrections if needed. - - Args: - df: Input DataFrame - code: Pandas query code to validate - - Returns: - Dictionary containing: - - 'code': Original code string - - 'is_valid': Boolean indicating if code is valid - - 'errors': List of validation errors - - 'suggested_correction': Suggested correction string or None - """ - validator = PandasQueryValidator(df, logger) - is_valid, errors = validator.validate_query(code) - suggested_correction = None - - if not is_valid: - suggested_correction = validator.suggest_corrections(code) - - return { - 'code': code.strip(), - 'is_valid': is_valid, - 'errors': errors, - 'suggested_correction': suggested_correction - } \ No newline at end of file + def validate_pandas_query(self, code: str) -> Dict: + """ + Validate a pandas query and suggest corrections if needed. + + Args: + df: Input DataFrame + code: Pandas query code to validate + + Returns: + Dictionary containing: + - 'code': Original code string + - 'is_valid': Boolean indicating if code is valid + - 'errors': List of validation errors + - 'suggested_correction': Suggested correction string or None + """ + is_valid, errors = self.validate_query(code) + suggested_correction = None + + if not is_valid: + suggested_correction = self.suggest_corrections(code) + + return { + 'code': code.strip(), + 'is_valid': is_valid, + 'errors': errors, + 'suggested_correction': suggested_correction + } \ No newline at end of file From b2d64e72257265b2ddad2f9a7dc8cc5c8093a3ef Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 13:52:07 +0000 Subject: [PATCH 6/9] pandas_query.py working with validator - before refactor --- .idea/.gitignore | 8 + examples/EC3_equations_short.csv | 38 ---- examples/customers-100.csv | 101 ++++++++++ examples/example.py | 42 ++-- src/pandas_query.py | 329 +++++++++++++++++++------------ src/pandas_validator.py | 146 ++++++-------- 6 files changed, 381 insertions(+), 283 deletions(-) create mode 100644 .idea/.gitignore delete mode 100644 examples/EC3_equations_short.csv create mode 100644 examples/customers-100.csv diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/examples/EC3_equations_short.csv b/examples/EC3_equations_short.csv deleted file mode 100644 index f3b455a..0000000 --- a/examples/EC3_equations_short.csv +++ /dev/null @@ -1,38 +0,0 @@ -,section_number,section_title,equation,variables -0,1,Section 1,"{""expression"": ""x-x \\\\text{ axis along a member}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""description"": ""The x-x axis is the longitudinal axis along the member."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7428aa91-6096-4981-b09f-b18de708d848"", ""description"": ""x-x axis along a member"", ""name"": ""x"", ""parent_uuid"": ""2cfa4e1f-744e-429d-91f8-e2efcf0c03ac"", ""uuid"": ""7428aa91-6096-4981-b09f-b18de708d848"", ""type"": ""variable""}]" -1,1,Section 1,"{""expression"": ""y-y \\\\text{ axis of a cross-section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""description"": ""The y-y axis is the vertical axis of a cross-section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""47906c6b-56e4-482a-b92f-0ce8e76339e0"", ""description"": ""y-y axis of a cross-section"", ""name"": ""y"", ""parent_uuid"": ""5203bba8-9473-420f-aa9f-0470634a4135"", ""uuid"": ""47906c6b-56e4-482a-b92f-0ce8e76339e0"", ""type"": ""variable""}]" -2,1,Section 1,"{""expression"": ""z-z \\\\text{ axis of a cross-section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""description"": ""The z-z axis is the horizontal axis of a cross-section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f104cba3-5400-4456-b37f-419d81a166cd"", ""description"": ""z-z axis of a cross-section"", ""name"": ""z"", ""parent_uuid"": ""3d57a809-46f3-4a1b-bec1-16520beab9f9"", ""uuid"": ""f104cba3-5400-4456-b37f-419d81a166cd"", ""type"": ""variable""}]" -3,1,Section 1,"{""expression"": ""u-u \\\\text{ major principal axis}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""description"": ""The u-u axis is the major principal axis, not coinciding with the y-y axis."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8ee7fb0b-4f69-4150-a907-41da6142bac4"", ""description"": ""major principal axis"", ""name"": ""u"", ""parent_uuid"": ""3803066a-ce4e-4b8f-abc0-a86513893e35"", ""uuid"": ""8ee7fb0b-4f69-4150-a907-41da6142bac4"", ""type"": ""variable""}]" -4,1,Section 1,"{""expression"": ""v-v \\\\text{ minor principal axis}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""description"": ""The v-v axis is the minor principal axis, not coinciding with the z-z axis."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9b2f1377-a981-4ffd-adb0-ff36b72c4fc1"", ""description"": ""minor principal axis"", ""name"": ""v"", ""parent_uuid"": ""37a4e304-46f7-4189-97a1-c087dd6a0a7b"", ""uuid"": ""9b2f1377-a981-4ffd-adb0-ff36b72c4fc1"", ""type"": ""variable""}]" -5,1,Section 1,"{""expression"": ""b \\\\text{ width of a cross section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""de7f9e9d-f4a2-4078-9829-fcb8a9986919"", ""description"": ""The width of the cross section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""de7f9e9d-f4a2-4078-9829-fcb8a9986919"", ""type"": ""equation"", ""variables"": []}",[] -6,1,Section 1,"{""expression"": ""h \\\\text{ depth of a cross section}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""90374ae4-1624-4ae4-b414-74575ba976d3"", ""description"": ""The depth of the cross section."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""90374ae4-1624-4ae4-b414-74575ba976d3"", ""type"": ""equation"", ""variables"": []}",[] -7,1,Section 1,"{""expression"": ""d \\\\text{ depth of straight portion of a web}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a19f5dbb-8d6a-49f4-af09-03f81483d271"", ""description"": ""The depth of the straight portion of a web."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""a19f5dbb-8d6a-49f4-af09-03f81483d271"", ""type"": ""equation"", ""variables"": []}",[] -8,1,Section 1,"{""expression"": ""t_{w} \\\\text{ web thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""3f3a1d96-fe34-4e48-a576-1196eb095a02"", ""description"": ""The thickness of the web."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""3f3a1d96-fe34-4e48-a576-1196eb095a02"", ""type"": ""equation"", ""variables"": []}",[] -9,1,Section 1,"{""expression"": ""t_{f} \\\\text{ flange thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""894aab11-f36d-481e-b286-dd506aaa6fa2"", ""description"": ""The thickness of the flange."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""894aab11-f36d-481e-b286-dd506aaa6fa2"", ""type"": ""equation"", ""variables"": []}",[] -10,1,Section 1,"{""expression"": ""r \\\\text{ radius of root fillet}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""17e80b15-0de0-480d-9d81-4c4d7f38e563"", ""description"": ""The radius of the root fillet."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""17e80b15-0de0-480d-9d81-4c4d7f38e563"", ""type"": ""equation"", ""variables"": []}",[] -11,1,Section 1,"{""expression"": ""r_{1} \\\\text{ radius of root fillet}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9cb87802-20bd-4cba-b27b-e7a04104f2de"", ""description"": ""The radius of the root fillet (alternative notation)."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""9cb87802-20bd-4cba-b27b-e7a04104f2de"", ""type"": ""equation"", ""variables"": []}",[] -12,1,Section 1,"{""expression"": ""r_{2} \\\\text{ toe radius}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""5eee8bdf-cd2b-4d76-9be7-46b43f110930"", ""description"": ""The toe radius."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""5eee8bdf-cd2b-4d76-9be7-46b43f110930"", ""type"": ""equation"", ""variables"": []}",[] -13,1,Section 1,"{""expression"": ""t \\\\text{ thickness}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c18f50a2-522a-49f1-9ad4-f2f64fb99df3"", ""description"": ""The thickness of the member."", ""parent_uuid"": ""4efaa0c7-d405-4aa4-81f7-da41900cfb53"", ""uuid"": ""c18f50a2-522a-49f1-9ad4-f2f64fb99df3"", ""type"": ""equation"", ""variables"": []}",[] -14,1.2.1,General reference standards,"{""expression"": ""EN 1090"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""111950e8-8f12-452e-b56c-2d5297339ed8"", ""description"": ""Execution of steel structures - Technical requirements"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""111950e8-8f12-452e-b56c-2d5297339ed8"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] -15,1.2.1,General reference standards,"{""expression"": ""EN ISO 12944"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""86006784-ccee-49e3-a192-9aef09e923f3"", ""description"": ""Paints and varnishes - Corrosion protection of steel structures by protective paint systems"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""86006784-ccee-49e3-a192-9aef09e923f3"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] -16,1.2.1,General reference standards,"{""expression"": ""AC 2 EN ISO 1461 $4 AC_{2}$"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""504404b4-30e5-4189-aa8d-192411838536"", ""description"": ""Hot dip galvanized coatings on fabricated iron and steel articles - specifications and test methods"", ""parent_uuid"": ""89c82902-67a1-4ab5-ac79-7900536281c5"", ""uuid"": ""504404b4-30e5-4189-aa8d-192411838536"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] -17,1.5.6,buckling length,"{""expression"": ""L_{b} = k_{l} \\\\cdot L_{0}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""description"": ""The buckling length (L_{b}) is calculated as the product of the effective length factor (k_{l}) and the unbraced length (L_{0})."", ""parent_uuid"": ""07cbadb6-84ca-4684-86b3-84566c7145f4"", ""uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""type"": ""equation""}","[{""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""fbac13d8-4b4f-4e79-af8f-6f30334c9a76"", ""description"": ""Effective length factor"", ""name"": ""k_{l}"", ""parent_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""uuid"": ""fbac13d8-4b4f-4e79-af8f-6f30334c9a76"", ""type"": ""variable""}, {""unit"": ""m"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""e253a218-e3bc-4915-b883-4d7443c75559"", ""description"": ""Unbraced length of the member"", ""name"": ""L_{0}"", ""parent_uuid"": ""c4d1dc5c-e91d-4ba2-803a-f4faea4c8f9b"", ""uuid"": ""e253a218-e3bc-4915-b883-4d7443c75559"", ""type"": ""variable""}]" -18,2,Section 2,"{""expression"": ""P_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""description"": ""Nominal value of the effect of prestressing imposed during erection"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""be1d4b88-50a1-4237-bb06-8f4d60a31678"", ""description"": ""Nominal value of the effect of prestressing imposed during erection"", ""name"": ""P_{k}"", ""parent_uuid"": ""d0922511-5b4c-49ed-8646-0a19206fcc22"", ""uuid"": ""be1d4b88-50a1-4237-bb06-8f4d60a31678"", ""type"": ""variable""}]" -19,2,Section 2,"{""expression"": ""G_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""description"": ""Nominal value of the effect of permanent actions"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""6573bd15-5a65-40c7-a8dd-b930bd57e3eb"", ""description"": ""Nominal value of the effect of permanent actions"", ""name"": ""G_{k}"", ""parent_uuid"": ""f199f388-8e04-4297-848d-7c564dd70795"", ""uuid"": ""6573bd15-5a65-40c7-a8dd-b930bd57e3eb"", ""type"": ""variable""}]" -20,2,Section 2,"{""expression"": ""X_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""description"": ""Characteristic values of material property"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2b0a31f9-8ed5-402c-b79f-d6d139250bab"", ""description"": ""Characteristic values of material property"", ""name"": ""X_{k}"", ""parent_uuid"": ""16ceccea-a113-417f-90be-40059d75a019"", ""uuid"": ""2b0a31f9-8ed5-402c-b79f-d6d139250bab"", ""type"": ""variable""}]" -21,2,Section 2,"{""expression"": ""X_{n}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""description"": ""Nominal values of material property"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""843548c5-6333-464b-9c4d-0a4207cb9568"", ""description"": ""Nominal values of material property"", ""name"": ""X_{n}"", ""parent_uuid"": ""2203cd28-9f21-465c-a83d-06a596ac4c36"", ""uuid"": ""843548c5-6333-464b-9c4d-0a4207cb9568"", ""type"": ""variable""}]" -22,2,Section 2,"{""expression"": ""R_{d}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""description"": ""Design value of resistance"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d2d0f544-2f19-4529-8e8b-cf7e7d529908"", ""description"": ""Design value of resistance"", ""name"": ""R_{d}"", ""parent_uuid"": ""7c64c026-c967-4355-81de-3d4fbff8c99b"", ""uuid"": ""d2d0f544-2f19-4529-8e8b-cf7e7d529908"", ""type"": ""variable""}]" -23,2,Section 2,"{""expression"": ""R_{k}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""description"": ""Characteristic value of resistance"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""35eec3c1-56f3-47a9-9e3f-997136e039c6"", ""description"": ""Characteristic value of resistance"", ""name"": ""R_{k}"", ""parent_uuid"": ""de25b03d-888a-4341-b14f-80b72a0a648d"", ""uuid"": ""35eec3c1-56f3-47a9-9e3f-997136e039c6"", ""type"": ""variable""}]" -24,2,Section 2,"{""expression"": ""\\\\gamma_{M}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""description"": ""General partial factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""287695b0-64ee-4b9c-babd-3814879c2870"", ""description"": ""General partial factor"", ""name"": ""\\\\gamma_{M}"", ""parent_uuid"": ""21d9bb0f-2ca7-45b0-80ef-89e8eb7712cf"", ""uuid"": ""287695b0-64ee-4b9c-babd-3814879c2870"", ""type"": ""variable""}]" -25,2,Section 2,"{""expression"": ""\\\\gamma_{Mi}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""description"": ""Particular partial factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""cb625bc8-3efe-40b5-b743-7fc04540c1c0"", ""description"": ""Particular partial factor"", ""name"": ""\\\\gamma_{Mi}"", ""parent_uuid"": ""bf7621c4-a0d8-4fd4-a4cf-08872020ca9d"", ""uuid"": ""cb625bc8-3efe-40b5-b743-7fc04540c1c0"", ""type"": ""variable""}]" -26,2,Section 2,"{""expression"": ""\\\\gamma_{Mf}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""description"": ""Partial factor for fatigue"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""f894e44e-3f98-4e1c-aee8-c3f82ce4b8ec"", ""description"": ""Partial factor for fatigue"", ""name"": ""\\\\gamma_{Mf}"", ""parent_uuid"": ""8ebcc6f2-c90f-45fb-bc01-24d793589b48"", ""uuid"": ""f894e44e-3f98-4e1c-aee8-c3f82ce4b8ec"", ""type"": ""variable""}]" -27,2,Section 2,"{""expression"": ""\\\\eta"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""description"": ""Conversion factor"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""30489fe4-f5d2-4949-b46b-df0e6070b6dd"", ""description"": ""Conversion factor"", ""name"": ""\\\\eta"", ""parent_uuid"": ""87712994-dabb-4be8-b639-27d6a3e36d37"", ""uuid"": ""30489fe4-f5d2-4949-b46b-df0e6070b6dd"", ""type"": ""variable""}]" -28,2,Section 2,"{""expression"": ""a_{d}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""description"": ""Design value of geometrical data"", ""parent_uuid"": ""bcf94bf1-00b2-4e82-81ec-8758ee63acb5"", ""uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c68c19e4-bfba-4c83-a85a-357eeaf45568"", ""description"": ""Design value of geometrical data"", ""name"": ""a_{d}"", ""parent_uuid"": ""9a818d35-7495-4078-b886-60e1f823f5fc"", ""uuid"": ""c68c19e4-bfba-4c83-a85a-357eeaf45568"", ""type"": ""variable""}]" -29,2.1.3.1,General,"{""expression"": ""AC_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""description"": ""General requirements for the durability of steel structures based on the type of action and design working life."", ""parent_uuid"": ""972ae989-5a40-4498-9f05-c4c433597823"", ""uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""d2a80956-68f7-4b58-a063-3d7449412c37"", ""description"": ""The type of action affecting durability."", ""name"": ""Type of action"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""d2a80956-68f7-4b58-a063-3d7449412c37"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b7c277d6-9c44-4515-8060-f3f112026836"", ""description"": ""The intended lifespan of the structure as per EN 1990."", ""name"": ""Design working life"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""b7c277d6-9c44-4515-8060-f3f112026836"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""062734be-57a0-4d81-a8e0-7badc7a9c1c6"", ""description"": ""Methods to protect against corrosion including surface protection, weathering steel, and stainless steel."", ""name"": ""Corrosion protection methods"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""062734be-57a0-4d81-a8e0-7badc7a9c1c6"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""54e65188-6533-4932-88d9-c99b7ce42e5b"", ""description"": ""The required fatigue life of the structure as per EN 1993-1-9."", ""name"": ""Fatigue life"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""54e65188-6533-4932-88d9-c99b7ce42e5b"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""7656cd6c-c5b0-48fc-a93a-9d434dbe73b4"", ""description"": ""Design considerations for wear resistance."", ""name"": ""Wearing design"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""7656cd6c-c5b0-48fc-a93a-9d434dbe73b4"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""fea415d0-afd3-4bd1-8712-fd582c9b5af9"", ""description"": ""Design considerations for accidental actions as per EN 1991-1-7."", ""name"": ""Accidental actions"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""fea415d0-afd3-4bd1-8712-fd582c9b5af9"", ""type"": ""variable""}, {""unit"": ""None"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a11a3a22-0751-49bd-84ba-464dfaf8bc8b"", ""description"": ""Requirements for the inspection and maintenance of the structure."", ""name"": ""Inspection and maintenance"", ""parent_uuid"": ""b5d5997f-4d99-4ddc-a1f4-6d4368f55b2d"", ""uuid"": ""a11a3a22-0751-49bd-84ba-464dfaf8bc8b"", ""type"": ""variable""}]" -30,2.1.3.2,Design working life for buildings,"{""expression"": ""A C_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""description"": ""The design working life shall be taken as the period for which a building structure is expected to be used for its intended purpose."", ""parent_uuid"": ""a6159013-feed-4450-97b5-bdc376f1d1a2"", ""uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""type"": ""equation"", ""output_variable"": ""None""}","[{""unit"": ""years"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""46b599e7-3a18-4e6b-a3e7-469368db0bb3"", ""description"": ""The expected period for which a building structure is intended to be used."", ""name"": ""Design Working Life"", ""parent_uuid"": ""b5f95f76-4d74-4e72-8175-99c08005a8c0"", ""uuid"": ""46b599e7-3a18-4e6b-a3e7-469368db0bb3"", ""type"": ""variable""}]" -31,2.1.3.3,Durability for buildings,"{""expression"": ""A_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bcb49066-3c30-4e86-8de5-2f2e50d71d4a"", ""description"": ""Durability requirement for buildings and their components to be designed for environmental actions and fatigue or protected from them."", ""parent_uuid"": ""e0dbf84a-600c-47f2-995e-2c23fb3ad54c"", ""uuid"": ""bcb49066-3c30-4e86-8de5-2f2e50d71d4a"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] -32,2.1.3.3,Durability for buildings,"{""expression"": ""A C_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""829efb1e-e32f-411b-9fa0-7412b26aed13"", ""description"": ""Consideration of material deterioration, corrosion, or fatigue through appropriate material choice and structural redundancy."", ""parent_uuid"": ""e0dbf84a-600c-47f2-995e-2c23fb3ad54c"", ""uuid"": ""829efb1e-e32f-411b-9fa0-7412b26aed13"", ""type"": ""equation"", ""variables"": [], ""output_variable"": ""None""}",[] -33,2.2,Principles of limit state design,"{""expression"": ""\\\\sqrt{AC_{2}}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""description"": ""The resistance of cross-sections and members specified in Eurocode 3 for the ultimate limit states."", ""parent_uuid"": ""540541e0-7e44-418d-842c-c34f0f11701c"", ""uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""faa42d2e-f5b6-41e4-814f-87ba2e0efdc7"", ""description"": ""A coefficient related to the material properties and design conditions"", ""name"": ""AC_{2}"", ""parent_uuid"": ""a7a226c8-d252-431b-a970-f31e7871a98f"", ""uuid"": ""faa42d2e-f5b6-41e4-814f-87ba2e0efdc7"", ""type"": ""variable""}]" -34,2.3.1,Actions and environmental influences,"{""expression"": ""P_k + \\\\sqrt{A C_{2}} G_k"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""description"": ""The total action considering permanent actions and imposed deformations during erection."", ""parent_uuid"": ""e7314dfa-02f1-45ef-afba-035d411f2b81"", ""uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""851d8496-9194-4a28-a1f0-61e4866409a8"", ""description"": ""Nominal value of imposed deformations as permanent actions"", ""name"": ""P_k"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""851d8496-9194-4a28-a1f0-61e4866409a8"", ""type"": ""variable""}, {""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""af35d11d-a3ea-4eda-b869-5d8d889bf9d4"", ""description"": ""Other permanent actions"", ""name"": ""G_k"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""af35d11d-a3ea-4eda-b869-5d8d889bf9d4"", ""type"": ""variable""}, {""unit"": ""m^2"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bab1002c-f685-472e-8f50-cd69ab7f3f03"", ""description"": ""Area factor related to the structure"", ""name"": ""A"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""bab1002c-f685-472e-8f50-cd69ab7f3f03"", ""type"": ""variable""}, {""unit"": ""dimensionless"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""35395598-3a71-4043-9bfb-fe8455cc6dd2"", ""description"": ""Coefficient related to the structure"", ""name"": ""C_{2}"", ""parent_uuid"": ""c457ba08-8e48-44f0-91bb-f4ce365e59b0"", ""uuid"": ""35395598-3a71-4043-9bfb-fe8455cc6dd2"", ""type"": ""variable""}]" -35,2.4.1,Design values of material properties,"{""expression"": ""AC_{1}"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""6c9c239c-dbf1-4ab7-9fdb-be272d5814e2"", ""description"": ""Design value of material properties as indicated in Eurocode."", ""parent_uuid"": ""62f5d44b-46e6-4795-9602-f81ec6ea8c32"", ""uuid"": ""6c9c239c-dbf1-4ab7-9fdb-be272d5814e2"", ""type"": ""equation"", ""variables"": []}",[] -36,2.4.3,Design resistances,"{""expression"": ""R_{d} = \\\\frac{R_{k}}{\\\\gamma_{M}} = \\\\frac{1}{\\\\gamma_{M}} R_{k}(\\\\eta_{1} X_{k, 1} ; \\\\eta_{i} X_{k, i} ; a_{d})"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""description"": ""Design resistance for steel structures as per EN 1990."", ""parent_uuid"": ""6d9634e0-07ca-4238-b4ab-21181c06ebc1"", ""uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""type"": ""equation""}","[{""unit"": ""N"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""bb4afe6b-45d0-4092-b592-f55b903bc397"", ""description"": ""Characteristic value of the particular resistance determined with characteristic or nominal values for the material properties and dimensions"", ""name"": ""R_k"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""bb4afe6b-45d0-4092-b592-f55b903bc397"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""1bda5452-168e-4894-859c-1b4f63fe689c"", ""description"": ""Global partial factor for the particular resistance"", ""name"": ""\\\\gamma_{M}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""1bda5452-168e-4894-859c-1b4f63fe689c"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""8786a087-efc7-4dfa-b1d2-d9fcca727c47"", ""description"": ""Parameter related to the resistance"", ""name"": ""\\\\eta_{1}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""8786a087-efc7-4dfa-b1d2-d9fcca727c47"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""db075c65-76ef-4081-b689-e00fe502581d"", ""description"": ""Parameter related to the resistance"", ""name"": ""\\\\eta_{i}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""db075c65-76ef-4081-b689-e00fe502581d"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""255b88d1-b2a1-4f5c-83e4-b8947b76a30e"", ""description"": ""Characteristic value related to the first parameter"", ""name"": ""X_{k, 1}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""255b88d1-b2a1-4f5c-83e4-b8947b76a30e"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""b84b73b0-ad29-4121-ab0b-12bfc55bc54a"", ""description"": ""Characteristic value related to the i-th parameter"", ""name"": ""X_{k, i}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""b84b73b0-ad29-4121-ab0b-12bfc55bc54a"", ""type"": ""variable""}, {""unit"": ""null"", ""doc_uuid"": ""d418f507-711a-4f53-a9e6-472909def3ab"", ""chunk_uuid"": ""471bbd0b-db2c-4760-84f1-dae70d957ead"", ""description"": ""Additional parameter related to the design resistance"", ""name"": ""a_{d}"", ""parent_uuid"": ""c0ad296d-1a78-4a79-b13d-43840360e514"", ""uuid"": ""471bbd0b-db2c-4760-84f1-dae70d957ead"", ""type"": ""variable""}]" diff --git a/examples/customers-100.csv b/examples/customers-100.csv new file mode 100644 index 0000000..635248c --- /dev/null +++ b/examples/customers-100.csv @@ -0,0 +1,101 @@ +Index,Customer Id,First Name,Last Name,Company,City,Country,Phone 1,Phone 2,Email,Subscription Date,Website +1,DD37Cf93aecA6Dc,Sheryl,Baxter,Rasmussen Group,East Leonard,Chile,229.077.5154,397.884.0519x718,zunigavanessa@smith.info,2020-08-24,http://www.stephenson.com/ +2,1Ef7b82A4CAAD10,Preston,Lozano,Vega-Gentry,East Jimmychester,Djibouti,5153435776,686-620-1820x944,vmata@colon.com,2021-04-23,http://www.hobbs.com/ +3,6F94879bDAfE5a6,Roy,Berry,Murillo-Perry,Isabelborough,Antigua and Barbuda,+1-539-402-0259,(496)978-3969x58947,beckycarr@hogan.com,2020-03-25,http://www.lawrence.com/ +4,5Cef8BFA16c5e3c,Linda,Olsen,"Dominguez, Mcmillan and Donovan",Bensonview,Dominican Republic,001-808-617-6467x12895,+1-813-324-8756,stanleyblackwell@benson.org,2020-06-02,http://www.good-lyons.com/ +5,053d585Ab6b3159,Joanna,Bender,"Martin, Lang and Andrade",West Priscilla,Slovakia (Slovak Republic),001-234-203-0635x76146,001-199-446-3860x3486,colinalvarado@miles.net,2021-04-17,https://goodwin-ingram.com/ +6,2d08FB17EE273F4,Aimee,Downs,Steele Group,Chavezborough,Bosnia and Herzegovina,(283)437-3886x88321,999-728-1637,louis27@gilbert.com,2020-02-25,http://www.berger.net/ +7,EA4d384DfDbBf77,Darren,Peck,"Lester, Woodard and Mitchell",Lake Ana,Pitcairn Islands,(496)452-6181x3291,+1-247-266-0963x4995,tgates@cantrell.com,2021-08-24,https://www.le.com/ +8,0e04AFde9f225dE,Brett,Mullen,"Sanford, Davenport and Giles",Kimport,Bulgaria,001-583-352-7197x297,001-333-145-0369,asnow@colon.com,2021-04-12,https://hammond-ramsey.com/ +9,C2dE4dEEc489ae0,Sheryl,Meyers,Browning-Simon,Robersonstad,Cyprus,854-138-4911x5772,+1-448-910-2276x729,mariokhan@ryan-pope.org,2020-01-13,https://www.bullock.net/ +10,8C2811a503C7c5a,Michelle,Gallagher,Beck-Hendrix,Elaineberg,Timor-Leste,739.218.2516x459,001-054-401-0347x617,mdyer@escobar.net,2021-11-08,https://arias.com/ +11,216E205d6eBb815,Carl,Schroeder,"Oconnell, Meza and Everett",Shannonville,Guernsey,637-854-0256x825,114.336.0784x788,kirksalas@webb.com,2021-10-20,https://simmons-hurley.com/ +12,CEDec94deE6d69B,Jenna,Dodson,"Hoffman, Reed and Mcclain",East Andrea,Vietnam,(041)737-3846,+1-556-888-3485x42608,mark42@robbins.com,2020-11-29,http://www.douglas.net/ +13,e35426EbDEceaFF,Tracey,Mata,Graham-Francis,South Joannamouth,Togo,001-949-844-8787,(855)713-8773,alex56@walls.org,2021-12-02,http://www.beck.com/ +14,A08A8aF8BE9FaD4,Kristine,Cox,Carpenter-Cook,Jodyberg,Sri Lanka,786-284-3358x62152,+1-315-627-1796x8074,holdenmiranda@clarke.com,2021-02-08,https://www.brandt.com/ +15,6fEaA1b7cab7B6C,Faith,Lutz,Carter-Hancock,Burchbury,Singapore,(781)861-7180x8306,207-185-3665,cassieparrish@blevins-chapman.net,2022-01-26,http://stevenson.org/ +16,8cad0b4CBceaeec,Miranda,Beasley,Singleton and Sons,Desireeshire,Oman,540.085.3135x185,+1-600-462-6432x21881,vduncan@parks-hardy.com,2022-04-12,http://acosta.org/ +17,a5DC21AE3a21eaA,Caroline,Foley,Winters-Mendoza,West Adriennestad,Western Sahara,936.222.4746x9924,001-469-948-6341x359,holtgwendolyn@watson-davenport.com,2021-03-10,http://www.benson-roth.com/ +18,F8Aa9d6DfcBeeF8,Greg,Mata,Valentine LLC,Lake Leslie,Mozambique,(701)087-2415,(195)156-1861x26241,jaredjuarez@carroll.org,2022-03-26,http://pitts-cherry.com/ +19,F160f5Db3EfE973,Clifford,Jacobson,Simon LLC,Harmonview,South Georgia and the South Sandwich Islands,001-151-330-3524x0469,(748)477-7174,joseph26@jacobson.com,2020-09-24,https://mcconnell.com/ +20,0F60FF3DdCd7aB0,Joanna,Kirk,Mays-Mccormick,Jamesshire,French Polynesia,(266)131-7001x711,(283)312-5579x11543,tuckerangie@salazar.net,2021-09-24,https://www.camacho.net/ +21,9F9AdB7B8A6f7F2,Maxwell,Frye,Patterson Inc,East Carly,Malta,423.262.3059,202-880-0688x7491,fgibson@drake-webb.com,2022-01-12,http://www.roberts.com/ +22,FBd0Ded4F02a742,Kiara,Houston,"Manning, Hester and Arroyo",South Alvin,Netherlands,001-274-040-3582x10611,+1-528-175-0973x4684,blanchardbob@wallace-shannon.com,2020-09-15,https://www.reid-potts.com/ +23,2FB0FAA1d429421,Colleen,Howard,Greer and Sons,Brittanyview,Paraguay,1935085151,(947)115-7711x5488,rsingleton@ryan-cherry.com,2020-08-19,http://paul.biz/ +24,010468dAA11382c,Janet,Valenzuela,Watts-Donaldson,Veronicamouth,Lao People's Democratic Republic,354.259.5062x7538,500.433.2022,stefanie71@spence.com,2020-09-08,https://moreno.biz/ +25,eC1927Ca84E033e,Shane,Wilcox,Tucker LLC,Bryanville,Albania,(429)005-9030x11004,541-116-4501,mariah88@santos.com,2021-04-06,https://www.ramos.com/ +26,09D7D7C8Fe09aea,Marcus,Moody,Giles Ltd,Kaitlyntown,Panama,674-677-8623,909-277-5485x566,donnamullins@norris-barrett.org,2022-05-24,https://www.curry.com/ +27,aBdfcF2c50b0bfD,Dakota,Poole,Simmons Group,Michealshire,Belarus,(371)987-8576x4720,071-152-1376,stacey67@fields.org,2022-02-20,https://sanford-wilcox.biz/ +28,b92EBfdF8a3f0E6,Frederick,Harper,"Hinton, Chaney and Stokes",South Marissatown,Switzerland,+1-077-121-1558x0687,264.742.7149,jacobkhan@bright.biz,2022-05-26,https://callahan.org/ +29,3B5dAAFA41AFa22,Stefanie,Fitzpatrick,Santana-Duran,Acevedoville,Saint Vincent and the Grenadines,(752)776-3286,+1-472-021-4814x85074,wterrell@clark.com,2020-07-30,https://meyers.com/ +30,EDA69ca7a6e96a2,Kent,Bradshaw,Sawyer PLC,North Harold,Tanzania,+1-472-143-5037x884,126.922.6153,qjimenez@boyd.com,2020-04-26,http://maynard-ho.com/ +31,64DCcDFaB9DFd4e,Jack,Tate,"Acosta, Petersen and Morrow",West Samuel,Zimbabwe,965-108-4406x20714,046.906.1442x6784,gfigueroa@boone-zavala.com,2021-09-15,http://www.hawkins-ramsey.com/ +32,679c6c83DD872d6,Tom,Trujillo,Mcgee Group,Cunninghamborough,Denmark,416-338-3758,(775)890-7209,tapiagreg@beard.info,2022-01-13,http://www.daniels-klein.com/ +33,7Ce381e4Afa4ba9,Gabriel,Mejia,Adkins-Salinas,Port Annatown,Liechtenstein,4077245425,646.044.0696x66800,coleolson@jennings.net,2021-04-24,https://patel-hanson.info/ +34,A09AEc6E3bF70eE,Kaitlyn,Santana,Herrera Group,New Kaitlyn,United States of America,6303643286,447-710-6202x07313,georgeross@miles.org,2021-09-21,http://pham.com/ +35,aA9BAFfBc3710fe,Faith,Moon,"Waters, Chase and Aguilar",West Marthaburgh,Bahamas,+1-586-217-0359x6317,+1-818-199-1403,willistonya@randolph-baker.com,2021-11-03,https://spencer-charles.info/ +36,E11dfb2DB8C9f72,Tammie,Haley,"Palmer, Barnes and Houston",East Teresa,Belize,001-276-734-4113x6087,(430)300-8770,harrisisaiah@jenkins.com,2022-01-04,http://evans-simon.com/ +37,889eCf90f68c5Da,Nicholas,Sosa,Jordan Ltd,South Hunter,Uruguay,(661)425-6042,975-998-1519,fwolfe@dorsey.com,2021-08-10,https://www.fleming-richards.com/ +38,7a1Ee69F4fF4B4D,Jordan,Gay,Glover and Sons,South Walter,Solomon Islands,7208417020,8035336772,tiffanydavies@harris-mcfarland.org,2021-02-24,http://www.lee.org/ +39,dca4f1D0A0fc5c9,Bruce,Esparza,Huerta-Mclean,Poolefurt,Montenegro,559-529-4424,001-625-000-7132x0367,preese@frye-vega.com,2021-10-22,http://www.farley.org/ +40,17aD8e2dB3df03D,Sherry,Garza,Anderson Ltd,West John,Poland,001-067-713-6440x158,(978)289-8785x5766,ann48@miller.com,2021-11-01,http://spence.com/ +41,2f79Cd309624Abb,Natalie,Gentry,Monroe PLC,West Darius,Dominican Republic,830.996.8238,499.122.5415,tcummings@fitzpatrick-ashley.com,2020-10-10,http://www.dorsey.biz/ +42,6e5ad5a5e2bB5Ca,Bryan,Dunn,Kaufman and Sons,North Jimstad,Burkina Faso,001-710-802-5565,078.699.8982x13881,woodwardandres@phelps.com,2021-09-08,http://www.butler.com/ +43,7E441b6B228DBcA,Wayne,Simpson,Perkins-Trevino,East Rebekahborough,Bolivia,(344)156-8632x1869,463-445-3702x38463,barbarapittman@holder.com,2020-12-13,https://gillespie-holder.com/ +44,D3fC11A9C235Dc6,Luis,Greer,Cross PLC,North Drew,Bulgaria,001-336-025-6849x701,684.698.2911x6092,bstuart@williamson-mcclure.com,2022-05-15,https://fletcher-nielsen.com/ +45,30Dfa48fe5Ede78,Rhonda,Frost,"Herrera, Shepherd and Underwood",Lake Lindaburgh,Monaco,(127)081-9339,+1-431-028-3337x3492,zkrueger@wolf-chavez.net,2021-12-06,http://www.khan.com/ +46,fD780ED8dbEae7B,Joanne,Montes,"Price, Sexton and Mcdaniel",Gwendolynview,Palau,(897)726-7952,(467)886-9467x5721,juan80@henson.net,2020-07-01,http://ochoa.com/ +47,300A40d3ce24bBA,Geoffrey,Guzman,Short-Wiggins,Zimmermanland,Uzbekistan,975.235.8921x269,(983)188-6873,bauercrystal@gay.com,2020-04-23,https://decker-kline.com/ +48,283DFCD0Dba40aF,Gloria,Mccall,"Brennan, Acosta and Ramos",North Kerriton,Ghana,445-603-6729,001-395-959-4736x4524,bartlettjenna@zuniga-moss.biz,2022-03-11,http://burgess-frank.com/ +49,F4Fc91fEAEad286,Brady,Cohen,Osborne-Erickson,North Eileenville,United Arab Emirates,741.849.0139x524,+1-028-691-7497x0894,mccalltyrone@durham-rose.biz,2022-03-10,http://hammond-barron.com/ +50,80F33Fd2AcebF05,Latoya,Mccann,"Hobbs, Garrett and Sanford",Port Sergiofort,Belarus,(530)287-4548x29481,162-234-0249x32790,bobhammond@barry.biz,2021-12-02,https://www.burton.com/ +51,Aa20BDe68eAb0e9,Gerald,Hawkins,"Phelps, Forbes and Koch",New Alberttown,Canada,+1-323-239-1456x96168,(092)508-0269,uwarner@steele-arias.com,2021-03-19,https://valenzuela.com/ +52,e898eEB1B9FE22b,Samuel,Crawford,"May, Goodwin and Martin",South Jasmine,Algeria,802-242-7457,626.116.9535x8578,xpittman@ritter-carney.net,2021-03-27,https://guerrero.org/ +53,faCEF517ae7D8eB,Patricia,Goodwin,"Christian, Winters and Ellis",Cowanfort,Swaziland,322.549.7139x70040,(111)741-4173,vaughanchristy@lara.biz,2021-03-08,http://clark.info/ +54,c09952De6Cda8aA,Stacie,Richard,Byrd Inc,New Deborah,Madagascar,001-622-948-3641x24810,001-731-168-2893x8891,clinton85@colon-arias.org,2020-10-15,https://kim.com/ +55,f3BEf3Be028166f,Robin,West,"Nixon, Blackwell and Sosa",Wallstown,Ecuador,698.303.4267,001-683-837-7651x525,greenemiranda@zimmerman.com,2022-01-13,https://www.mora.com/ +56,C6F2Fc6a7948a4e,Ralph,Haas,Montes PLC,Lake Ellenchester,Palestinian Territory,2239271999,001-962-434-0867x649,goodmancesar@figueroa.biz,2020-05-25,http://may.com/ +57,c8FE57cBBdCDcb2,Phyllis,Maldonado,Costa PLC,Lake Whitney,Saint Barthelemy,4500370767,001-508-064-6725x017,yhanson@warner-diaz.org,2021-01-25,http://www.bernard.com/ +58,B5acdFC982124F2,Danny,Parrish,Novak LLC,East Jaredbury,United Arab Emirates,(669)384-8597x8794,506.731.5952x571,howelldarren@house-cohen.com,2021-03-17,http://www.parsons-hudson.com/ +59,8c7DdF10798bCC3,Kathy,Hill,"Moore, Mccoy and Glass",Selenabury,South Georgia and the South Sandwich Islands,001-171-716-2175x310,888.625.0654,ncamacho@boone-simmons.org,2020-11-15,http://hayden.com/ +60,C681dDd0cc422f7,Kelli,Hardy,Petty Ltd,Huangfort,Sao Tome and Principe,020.324.2191x2022,424-157-8216,kristopher62@oliver.com,2020-12-20,http://www.kidd.com/ +61,a940cE42e035F28,Lynn,Pham,"Brennan, Camacho and Tapia",East Pennyshire,Portugal,846.468.6834x611,001-248-691-0006,mpham@rios-guzman.com,2020-08-21,https://www.murphy.com/ +62,9Cf5E6AFE0aeBfd,Shelley,Harris,"Prince, Malone and Pugh",Port Jasminborough,Togo,423.098.0315x8373,+1-386-458-8944x15194,zachary96@mitchell-bryant.org,2020-12-10,https://www.ryan.com/ +63,aEcbe5365BbC67D,Eddie,Jimenez,Caldwell Group,West Kristine,Ethiopia,+1-235-657-1073x6306,(026)401-7353x2417,kristiwhitney@bernard.com,2022-03-24,http://cherry.com/ +64,FCBdfCEAe20A8Dc,Chloe,Hutchinson,Simon LLC,South Julia,Netherlands,981-544-9452,+1-288-552-4666x060,leah85@sutton-terrell.com,2022-05-15,https://mitchell.info/ +65,636cBF0835E10ff,Eileen,Lynch,"Knight, Abbott and Hubbard",Helenborough,Liberia,+1-158-951-4131x53578,001-673-779-6713x680,levigiles@vincent.com,2021-01-02,http://mckay.com/ +66,fF1b6c9E8Fbf1ff,Fernando,Lambert,Church-Banks,Lake Nancy,Lithuania,497.829.9038,3863743398,fisherlinda@schaefer.net,2021-04-23,https://www.vang.com/ +67,2A13F74EAa7DA6c,Makayla,Cannon,Henderson Inc,Georgeport,New Caledonia,001-215-801-6392x46009,027-609-6460,scottcurtis@hurley.biz,2020-01-20,http://www.velazquez.net/ +68,a014Ec1b9FccC1E,Tom,Alvarado,Donaldson-Dougherty,South Sophiaberg,Kiribati,(585)606-2980x2258,730-797-3594x5614,nicholsonnina@montgomery.info,2020-08-18,http://odom-massey.com/ +69,421a109cABDf5fa,Virginia,Dudley,Warren Ltd,Hartbury,French Southern Territories,027.846.3705x14184,+1-439-171-1846x4636,zvalencia@phelps.com,2021-01-31,http://hunter-esparza.com/ +70,CC68FD1D3Bbbf22,Riley,Good,Wade PLC,Erikaville,Canada,6977745822,855-436-7641,alex06@galloway.com,2020-02-03,http://conway.org/ +71,CBCd2Ac8E3eBDF9,Alexandria,Buck,Keller-Coffey,Nicolasfort,Iran,078-900-4760x76668,414-112-8700x68751,lee48@manning.com,2021-02-20,https://ramsey.org/ +72,Ef859092FbEcC07,Richard,Roth,Conway-Mcbride,New Jasmineshire,Morocco,581-440-6539,9857827463,aharper@maddox-townsend.org,2020-02-23,https://www.brooks.com/ +73,F560f2d3cDFb618,Candice,Keller,Huynh and Sons,East Summerstad,Zimbabwe,001-927-965-8550x92406,001-243-038-4271x53076,buckleycory@odonnell.net,2020-08-22,https://www.lucero.com/ +74,A3F76Be153Df4a3,Anita,Benson,Parrish Ltd,Skinnerport,Russian Federation,874.617.5668x69878,(399)820-6418x0071,angie04@oconnell.com,2020-02-09,http://oconnor.com/ +75,D01Af0AF7cBbFeA,Regina,Stein,Guzman-Brown,Raystad,Solomon Islands,001-469-848-0724x4407,001-085-360-4426x00357,zrosario@rojas-hardin.net,2022-01-15,http://www.johnston.info/ +76,d40e89dCade7b2F,Debra,Riddle,"Chang, Aguirre and Leblanc",Colinhaven,United States Virgin Islands,+1-768-182-6014x14336,(303)961-4491,shieldskerry@robles.com,2020-07-11,http://kaiser.info/ +77,BF6a1f9bd1bf8DE,Brittany,Zuniga,Mason-Hester,West Reginald,Kyrgyz Republic,(050)136-9025,001-480-851-2496x0157,mchandler@cochran-huerta.org,2021-07-24,http://www.boyle.com/ +78,FfaeFFbbbf280db,Cassidy,Mcmahon,"Mcguire, Huynh and Hopkins",Lake Sherryborough,Myanmar,5040771311,684-682-0021x1326,katrinalane@fitzgerald.com,2020-10-21,https://hurst.com/ +79,CbAE1d1e9a8dCb1,Laurie,Pennington,"Sanchez, Marsh and Hale",Port Katherineville,Dominica,007.155.3406x553,+1-809-862-5566x277,cookejill@powell.com,2020-06-08,http://www.hebert.com/ +80,A7F85c1DE4dB87f,Alejandro,Blair,"Combs, Waller and Durham",Thomasland,Iceland,(690)068-4641x51468,555.509.8691x2329,elizabethbarr@ewing.com,2020-09-19,https://mercado-blevins.com/ +81,D6CEAfb3BDbaa1A,Leslie,Jennings,Blankenship-Arias,Coreybury,Micronesia,629.198.6346,075.256.0829,corey75@wiggins.com,2021-11-13,https://www.juarez.com/ +82,Ebdb6F6F7c90b69,Kathleen,Mckay,"Coffey, Lamb and Johnson",Lake Janiceton,Saint Vincent and the Grenadines,(733)910-9968,(691)247-4128x0665,chloelester@higgins-wilkinson.com,2021-09-12,http://www.owens-mooney.com/ +83,E8E7e8Cfe516ef0,Hunter,Moreno,Fitzpatrick-Lawrence,East Clinton,Isle of Man,(733)833-6754,001-761-013-7121,isaac26@benton-finley.com,2020-12-28,http://walls.info/ +84,78C06E9b6B3DF20,Chad,Davidson,Garcia-Jimenez,South Joshuashire,Oman,8275702958,(804)842-4715,justinwalters@jimenez.com,2021-11-15,http://www.garner-oliver.com/ +85,03A1E62ADdeb31c,Corey,Holt,"Mcdonald, Bird and Ramirez",New Glenda,Fiji,001-439-242-4986x7918,3162708934,maurice46@morgan.com,2020-02-18,http://www.watson.com/ +86,C6763c99d0bd16D,Emma,Cunningham,Stephens Inc,North Jillianview,New Zealand,128-059-0206x60217,(312)164-4545x2284,walter83@juarez.org,2022-05-13,http://www.reid.info/ +87,ebe77E5Bf9476CE,Duane,Woods,Montoya-Miller,Lyonsberg,Maldives,(636)544-7783x7288,(203)287-1003x5932,kmercer@wagner.com,2020-07-21,http://murray.org/ +88,E4Bbcd8AD81fC5f,Alison,Vargas,"Vaughn, Watts and Leach",East Cristinabury,Benin,365-273-8144,053-308-7653x6287,vcantu@norton.com,2020-11-10,http://mason.info/ +89,efeb73245CDf1fF,Vernon,Kane,Carter-Strickland,Thomasfurt,Yemen,114-854-1159x555,499-608-4612,hilljesse@barrett.info,2021-04-15,http://www.duffy-hensley.net/ +90,37Ec4B395641c1E,Lori,Flowers,Decker-Mcknight,North Joeburgh,Namibia,679.415.1210,945-842-3659x4581,tyrone77@valenzuela.info,2021-01-09,http://www.deleon-crosby.com/ +91,5ef6d3eefdD43bE,Nina,Chavez,Byrd-Campbell,Cassidychester,Bhutan,053-344-3205,+1-330-920-5422x571,elliserica@frank.com,2020-03-26,https://www.pugh.com/ +92,98b3aeDcC3B9FF3,Shane,Foley,Rocha-Hart,South Dannymouth,Hungary,+1-822-569-0302,001-626-114-5844x55073,nsteele@sparks.com,2021-07-06,https://www.holt-sparks.com/ +93,aAb6AFc7AfD0fF3,Collin,Ayers,Lamb-Peterson,South Lonnie,Anguilla,404-645-5351x012,001-257-582-8850x8516,dudleyemily@gonzales.biz,2021-06-29,http://www.ruiz.com/ +94,54B5B5Fe9F1B6C5,Sherry,Young,"Lee, Lucero and Johnson",Frankchester,Solomon Islands,158-687-1764,(438)375-6207x003,alan79@gates-mclaughlin.com,2021-04-04,https://travis.net/ +95,BE91A0bdcA49Bbc,Darrell,Douglas,"Newton, Petersen and Mathis",Daisyborough,Mali,001-084-845-9524x1777,001-769-564-6303,grayjean@lowery-good.com,2022-02-17,https://banks.biz/ +96,cb8E23e48d22Eae,Karl,Greer,Carey LLC,East Richard,Guyana,(188)169-1674x58692,001-841-293-3519x614,hhart@jensen.com,2022-01-30,http://hayes-perez.com/ +97,CeD220bdAaCfaDf,Lynn,Atkinson,"Ware, Burns and Oneal",New Bradview,Sri Lanka,+1-846-706-2218,605.413.3198,vkemp@ferrell.com,2021-07-10,https://novak-allison.com/ +98,28CDbC0dFe4b1Db,Fred,Guerra,Schmitt-Jones,Ortegaland,Solomon Islands,+1-753-067-8419x7170,+1-632-666-7507x92121,swagner@kane.org,2021-09-18,https://www.ross.com/ +99,c23d1D9EE8DEB0A,Yvonne,Farmer,Fitzgerald-Harrell,Lake Elijahview,Aruba,(530)311-9786,001-869-452-0943x12424,mccarthystephen@horn-green.biz,2021-08-11,http://watkins.info/ +100,2354a0E336A91A1,Clarence,Haynes,"Le, Nash and Cross",Judymouth,Honduras,(753)813-6941,783.639.1472,colleen91@faulkner.biz,2020-03-11,http://www.hatfield-saunders.net/ diff --git a/examples/example.py b/examples/example.py index 03457e4..373c224 100644 --- a/examples/example.py +++ b/examples/example.py @@ -1,32 +1,26 @@ -# examples/example.py -import os import pandas as pd -import sys -from pathlib import Path - -# Add the project root to the Python path -project_root = Path(__file__).resolve().parent.parent -sys.path.append(str(project_root)) - from src.pandas_query import PandasQuery -# Data -data = [ - ('John Doe', 25, 50), - ('Jane Smith', 38, 70), - ('Alex Johnson', 45, 80), - ('Jessica Brown', 60, 40), - ('Michael Davis', 22, 90), -] -df = pd.DataFrame(data, columns=['name', 'age', 'donation']) +# Create sample DataFrame +df = pd.read_csv("customers-100.csv") # Create query executor -querier = PandasQuery() +querier = PandasQuery(validate=True) # Execute query -query = "What is the average donation of people older than 40 who donated more than $50?" -result = querier.execute(df, query) +try: + result = querier.execute(df, "Get a table of all customers who have a first name beginning with 'D'?") + + # Get complete results as a dictionary + result_dict = result.model_dump() # Pydantic v2 syntax (or use .dict() for v1) + print("\nComplete results:") + import json + print(json.dumps(result_dict, indent=2)) + + # df of results + print('\nHere is a table of the output results:\n') + df_result = pd.DataFrame(result.result) + print(df_result) -print(f"Query: {query}") -print(f"Generated code: {querier.last_code}") -print(f"Result: {result}") \ No newline at end of file +except Exception as e: + print(f"Error executing query: {str(e)}") \ No newline at end of file diff --git a/src/pandas_query.py b/src/pandas_query.py index 7346e6d..e69fb4a 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -1,17 +1,84 @@ -from openai import OpenAI + import pandas as pd import numpy as np -from typing import Any, Optional, Dict -import os from RestrictedPython import compile_restricted from RestrictedPython.Guards import safe_builtins, guarded_iter_unpack_sequence from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter +from openai import OpenAI +import os +from typing import Dict, Any, Optional, Union, List +from pydantic import BaseModel, Field, validator from .pandas_validator import PandasQueryValidator -class PandasQuery: - """A streamlined class for executing natural language queries on pandas DataFrames using OpenAI's LLM.""" +class QueryResult(BaseModel): + """Pydantic model for query execution results.""" + query: str = Field(..., description="Original query string") + code: str = Field(..., description="Generated pandas code") + is_valid: bool = Field(..., description="Whether the query is valid") + errors: List[str] = Field(default_factory=list, description="List of validation/execution errors") + result: Optional[Any] = Field(None, description="Query execution result") + + class Config: + arbitrary_types_allowed = True + json_encoders = { + pd.DataFrame: lambda df: df.to_dict(orient='records'), + pd.Series: lambda s: s.to_dict(), + np.ndarray: lambda arr: arr.tolist(), + np.int64: lambda x: int(x), + np.float64: lambda x: float(x) + } + + def _serialize_value(self, v: Any) -> Any: + """Helper method to serialize values.""" + if isinstance(v, pd.DataFrame): + return v.to_dict(orient='records') + elif isinstance(v, pd.Series): + return v.to_dict() + elif isinstance(v, np.ndarray): + return v.tolist() + elif isinstance(v, (np.int64, np.float64)): + return float(v) + elif isinstance(v, dict): + return {k: self._serialize_value(v) for k, v in v.items()} + elif isinstance(v, list): + return [self._serialize_value(item) for item in v] + return v + + def model_dump(self, **kwargs) -> Dict[str, Any]: + """Override model_dump to ensure all values are serializable.""" + data = { + 'query': self.query, + 'code': self.code, + 'is_valid': self.is_valid, + 'errors': self.errors, + 'result': self._serialize_value(self.result), + } + return data + + def get_results(self) -> Dict[str, Any]: + """Get a simplified dictionary of just the key results.""" + return { + 'valid': self.is_valid, + 'result': self._serialize_value(self.result) if self.is_valid else None, + 'errors': self.errors if not self.is_valid else [], + } + + @validator('result', pre=True) + def validate_result(cls, v): + """Convert pandas/numpy results to native Python types.""" + if isinstance(v, pd.DataFrame): + return v.to_dict(orient='records') + elif isinstance(v, pd.Series): + return v.to_dict() + elif isinstance(v, np.ndarray): + return v.tolist() + elif isinstance(v, (np.int64, np.float64)): + return float(v) + return v + +class PandasQuery: def __init__( self, model: str = "gpt-4", @@ -22,11 +89,71 @@ def __init__( self.client = OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY")) self.model = model self.temperature = temperature - self.last_code = None self.validate = validate + self.restricted_globals = self._setup_restricted_globals() + + def execute(self, df: pd.DataFrame, query: str) -> QueryResult: + """Execute a natural language query with validation and return comprehensive results.""" + import time + + # Initialize result with Pydantic model + query_result = QueryResult( + query=query, + code="", + is_valid=False, + errors=[], + result=None, + ) + + try: + # Get code from LLM + response = self.client.chat.completions.create( + model=self.model, + temperature=self.temperature, + messages=[ + {"role": "user", "content": self._build_prompt(df, query)} + ] + ) + + code = response.choices[0].message.content.strip() + code = self._clean_code(code) + query_result.code = code + + # Validate if required + if self.validate: + validator = PandasQueryValidator(df) + validation_result = validator.get_validation_result(code) + + if not validation_result['is_valid']: + query_result.errors = validation_result['errors'] + return query_result + + # Use suggested correction if available + if validation_result['suggested_correction']: + code = validation_result['suggested_correction'] + query_result.code = code + + # Execute if valid + result = self._execute_in_sandbox(code, df) + + query_result.is_valid = True + query_result.result = result + + except Exception as e: + query_result.errors.append(f"Execution error: {str(e)}") + + return query_result + + def _setup_restricted_globals(self) -> Dict: + """Set up restricted globals for sandbox execution.""" + # Core pandas Series methods + series_methods = [ + "sum", "mean", "any", "argmax", "argmin", "count", + "diff", "dropna", "fillna", "head", "max", "min", + "sort_values", "unique", "isna", "astype" + ] - # Set up sandbox environment - self.restricted_globals = { + restricted_globals = { "__builtins__": dict(safe_builtins), "pd": pd, "np": np, @@ -36,144 +163,86 @@ def __init__( "_iter_unpack_sequence_": guarded_iter_unpack_sequence, } - # Core pandas Series methods (excluding string accessor methods) - self.series_methods = [ - "sum", "mean", "any", "argmax", "argmin", "count", "cumsum", - "diff", "dropna", "fillna", "head", "idxmax", "idxmin", - "max", "min", "notna", "prod", "quantile", "rename", "round", - "tail", "to_frame", "to_list", "to_numpy", "unique", - "sort_index", "sort_values", "aggregate", "isna", "astype" - ] - - # Add series methods to restricted globals - self.restricted_globals.update({ - method: getattr(pd.Series, method) for method in self.series_methods + # Add series methods + restricted_globals.update({ + method: getattr(pd.Series, method) for method in series_methods }) - def _build_prompt(self, df: pd.DataFrame, query: str, n: int = 5) -> str: - """Build a detailed prompt with DataFrame information and query context.""" - # Get detailed column information - column_info = [] - for col in df.columns: - dtype = df[col].dtype - null_count = df[col].isna().sum() - unique_count = df[col].nunique() - - # Get appropriate sample values and range info - sample_vals = df[col].sample(min(n, df[col].count())) - if pd.api.types.is_numeric_dtype(dtype): - try: - range_info = f"Range: {df[col].min()} to {df[col].max()}" - except: - range_info = f"Sample values: {list(sample_vals)}" - else: - range_info = f"Sample values: {list(sample_vals)}" - - column_info.append( - f"- {col} ({dtype}):\n" - f" * {range_info}\n" - f" * Null values: {null_count}\n" - f" * Unique values: {unique_count}" - ) - - prompt = f"""Given a pandas DataFrame with {len(df)} rows and the following columns: + return restricted_globals -{chr(10).join(column_info)} - -Write a single line of Python code that answers this question: + def _build_prompt(self, df: pd.DataFrame, query: str) -> str: + """Build a detailed prompt with DataFrame information and query context.""" + # Convert DataFrame info to dictionary for better LLM interpretation + df_info = { + "metadata": { + "rows": len(df), + "columns": len(df.columns) + }, + "columns": {} + } -{query} + for col in df.columns: + df_info["columns"][col] = { + "dtype": str(df[col].dtype), + "null_count": int(df[col].isna().sum()), + "unique_count": int(df[col].nunique()), + "sample_values": df[col].dropna().sample(min(3, len(df))).tolist() + } + if pd.api.types.is_numeric_dtype(df[col].dtype): + df_info["columns"][col].update({ + "min": float(df[col].min()) if not pd.isna(df[col].min()) else None, + "max": float(df[col].max()) if not pd.isna(df[col].max()) else None + }) -Guidelines: -1. Basic Requirements: - - Use only pandas and numpy operations - - Assign the result to a variable named 'result' - - Return only the code, no explanations + prompt = f"""Given a pandas DataFrame with the following structure: +``` +{df_info} +``` -2. Type-Specific Operations: - - For numeric operations on string numbers: Use pd.to_numeric(df['column'], errors='coerce') - - For string comparisons: Use .fillna('').str.lower() - - For string pattern matching: Use .str.contains() or .str.startswith() - - For datetime comparisons: Use .dt accessor +Write a single line of Python code that answers this question: {query} -3. Null Handling: - - Always handle null values before operations - - Use fillna() for string operations - - Use dropna() or fillna() for numeric operations +Requirements: +1. Assign result to 'result' variable +2. Handle null values appropriately +3. Use type-safe operations (pd.to_numeric for string numbers) +4. Use proper string operations (.str) and datetime (.dt) accessors +5. Return only the code, no explanations -4. Available Methods: - Core methods: {', '.join(self.series_methods)} - String operations available via .str accessor - DateTime operations available via .dt accessor +Available methods: {', '.join(self.restricted_globals.keys())}""" -Example patterns: -- String to number comparison: result = df[pd.to_numeric(df['column'], errors='coerce') > 5] -- Case-insensitive search: result = df[df['column'].fillna('').str.lower().str.contains('pattern')] -- Section number filtering: result = df[df['section_number'].fillna('').str.startswith('6')] -""" return prompt - def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: - """Execute code in RestrictedPython sandbox with validation.""" - try: - # Pre-execution validation - if self.validate: - validator = PandasQueryValidator(df) - validation_result = validator.validate_pandas_query(code) - if not validation_result['is_valid']: - for error in validation_result['errors']: - print(f"Warning: {error}") - if validation_result['suggested_correction']: - print("Using suggested correction") - code = validation_result['suggested_correction'] - - # Compile the code in restricted mode - byte_code = compile_restricted( - source=code, - filename='', - mode='exec' - ) - - # Create local namespace with DataFrame and numeric conversion function - local_vars = { - 'df': df, - 'result': None, - 'pd': pd - } - - # Execute in sandbox - exec(byte_code, self.restricted_globals, local_vars) - - result = local_vars['result'] - if result is None: - raise ValueError("Execution produced no result") - return result - except Exception as e: - error_msg = f"Sandbox execution failed. Code: {code}. Error: {str(e)}" - raise RuntimeError(error_msg) - - def execute(self, df: pd.DataFrame, query: str) -> Any: - """Execute a natural language query with validation.""" - response = self.client.chat.completions.create( - model=self.model, - temperature=self.temperature, - messages=[ - {"role": "user", "content": self._build_prompt(df, query)} - ] - ) - - code = response.choices[0].message.content.strip() - - # Clean up code + @staticmethod + def _clean_code(code: str) -> str: + """Clean up code from LLM response.""" if code.startswith("```"): code = code.split("\n", 1)[1].rsplit("\n", 1)[0] if code.startswith("python"): code = code.split("\n", 1)[1] - code = code.strip("` \n") + return code.strip("` \n") + + def _execute_in_sandbox(self, code: str, df: pd.DataFrame) -> Any: + """Execute code in RestrictedPython sandbox.""" + byte_code = compile_restricted( + source=code, + filename='', + mode='exec' + ) + + local_vars = {'df': df, 'result': None, 'pd': pd} + exec(byte_code, self.restricted_globals, local_vars) + + if local_vars['result'] is None: + raise ValueError("Execution produced no result") - self.last_code = code + return local_vars['result'] - # Execute in sandbox - return self._execute_in_sandbox(code, df) \ No newline at end of file + @staticmethod + def _extract_column_references(code: str) -> set[str]: + """Extract column references from code.""" + import re + pattern = r"df[\['](\w+)[\]']|df\.(\w+)" + matches = re.findall(pattern, code) + return {match[0] or match[1] for match in matches} \ No newline at end of file diff --git a/src/pandas_validator.py b/src/pandas_validator.py index 1900d2d..d8f0f68 100644 --- a/src/pandas_validator.py +++ b/src/pandas_validator.py @@ -4,7 +4,6 @@ from typing import Dict, List, Set, Tuple, Optional - class PandasQueryValidator: """Validates pandas query operations and provides suggestions for corrections.""" @@ -13,76 +12,65 @@ def __init__(self, df: pd.DataFrame): self.dtypes = df.dtypes.to_dict() self.columns = set(df.columns) - # Valid pandas operations by data type + # Valid pandas operations by data type - simplified to most common operations self.valid_operations = { 'object': { - 'string_ops': {'contains', 'startswith', 'endswith', 'lower', 'upper', 'strip', 'len'}, + 'string_ops': {'contains', 'startswith', 'endswith'}, 'comparisons': {'==', '!=', 'isin'} }, 'number': { - 'numeric_ops': {'sum', 'mean', 'min', 'max', 'count', 'median'}, + 'numeric_ops': {'sum', 'mean', 'min', 'max'}, 'comparisons': {'>', '<', '>=', '<=', '==', '!='} }, 'datetime': { - 'date_ops': {'year', 'month', 'day', 'hour', 'minute'}, + 'date_ops': {'year', 'month', 'day'}, 'comparisons': {'>', '<', '>=', '<=', '==', '!='} }, 'bool': { - 'bool_ops': {'any', 'all'}, 'comparisons': {'==', '!='} } } - # Common pandas aggregation functions - self.valid_aggregations = { - 'sum', 'mean', 'median', 'min', 'max', 'count', - 'std', 'var', 'first', 'last' - } - - # Valid pandas commands and their requirements - self.valid_commands = { - 'groupby': {'columns'}, - 'agg': {'groupby'}, - 'sort_values': {'columns'}, - 'fillna': {'value'}, - 'dropna': set(), - 'reset_index': set(), - 'merge': {'right', 'on', 'how'}, - 'join': {'on'}, - 'head': set(), - 'tail': set() + # Common pandas aggregation functions that require groupby + self.group_required_aggs = { + 'sum', 'mean', 'median', 'min', 'max', 'count' } - - def _extract_column_references(self, code: str) -> List[str]: + def _extract_column_references(self, code: str) -> set[str]: """Extract column references from the code.""" + import re # Match patterns like df['column'] or df.column pattern = r"df[\['](\w+)[\]']|df\.(\w+)" matches = re.findall(pattern, code) # Flatten and filter matches - columns = {match[0] or match[1] for match in matches} - return list(columns) + return {match[0] or match[1] for match in matches} - def _extract_operations(self, code: str) -> List[str]: + def _extract_operations(self, code: str) -> list[str]: """Extract pandas operations from the code.""" + import re # Match method calls on df or column references pattern = r'\.(\w+)\(' - operations = re.findall(pattern, code) - return operations + return re.findall(pattern, code) - def _check_column_existence(self, code: str) -> List[str]: + def _check_column_existence(self, code: str) -> list[str]: """Check if all referenced columns exist in the DataFrame.""" errors = [] referenced_columns = self._extract_column_references(code) for col in referenced_columns: if col not in self.columns: + similar_cols = [ + existing_col for existing_col in self.columns + if existing_col.lower() == col.lower() + ] error_msg = f"Column '{col}' does not exist in DataFrame" + if similar_cols: + error_msg += f". Did you mean '{similar_cols[0]}'?" errors.append(error_msg) return errors - def _check_operation_compatibility(self, code: str) -> List[str]: + def _check_operation_compatibility(self, code: str) -> list[str]: """Check if operations are compatible with column data types.""" errors = [] operations = self._extract_operations(code) @@ -93,43 +81,38 @@ def _check_operation_compatibility(self, code: str) -> List[str]: continue dtype = self.dtypes[col] - dtype_category = 'number' if pd.api.types.is_numeric_dtype(dtype) else \ - 'datetime' if pd.api.types.is_datetime64_dtype(dtype) else \ - 'bool' if pd.api.types.is_bool_dtype(dtype) else 'object' + dtype_category = ( + 'number' if pd.api.types.is_numeric_dtype(dtype) + else 'datetime' if pd.api.types.is_datetime64_dtype(dtype) + else 'bool' if pd.api.types.is_bool_dtype(dtype) + else 'object' + ) - valid_ops = set() if dtype_category in self.valid_operations: - for ops in self.valid_operations[dtype_category].values(): - valid_ops.update(ops) - - for op in operations: - if op not in valid_ops and op not in self.valid_commands: - error_msg = f"Operation '{op}' may not be compatible with column '{col}' of type {dtype}" - errors.append(error_msg) - - return errors - - def _check_null_handling(self, code: str) -> List[str]: - """Check for proper null value handling.""" - errors = [] - - # Check for string operations without null handling - if any(op in code for op in ['.str.', '.dt.']): - if 'fillna' not in code and 'dropna' not in code: - error_msg = "String or datetime operations detected without null handling" - errors.append(error_msg) + valid_ops = set().union( + *self.valid_operations[dtype_category].values() + ) + + for op in operations: + if op not in valid_ops and op not in self.group_required_aggs: + error_msg = ( + f"Operation '{op}' may not be compatible with " + f"column '{col}' of type {dtype}" + ) + errors.append(error_msg) return errors - def _check_aggregation_usage(self, code: str) -> List[str]: + def _check_aggregation_usage(self, code: str) -> list[str]: """Check for valid aggregation function usage.""" errors = [] operations = self._extract_operations(code) for op in operations: - if op.lower() in self.valid_aggregations: - # Check if groupby is used before aggregation - if 'groupby' not in code and not any(c in code for c in ['sum()', 'mean()', 'count()']): + if op in self.group_required_aggs: + if 'groupby' not in code and not any( + c in code for c in ['sum()', 'mean()', 'count()'] + ): error_msg = f"Aggregation '{op}' used without groupby" errors.append(error_msg) @@ -137,7 +120,6 @@ def _check_aggregation_usage(self, code: str) -> List[str]: def suggest_corrections(self, code: str) -> Optional[str]: """Attempt to suggest corrections for common issues.""" - corrected = code # Fix column name case sensitivity @@ -145,8 +127,12 @@ def suggest_corrections(self, code: str) -> Optional[str]: if col not in self.columns: for actual_col in self.columns: if col.lower() == actual_col.lower(): - corrected = corrected.replace(f"['{col}']", f"['{actual_col}']") - corrected = corrected.replace(f".{col}", f".{actual_col}") + corrected = corrected.replace( + f"['{col}']", f"['{actual_col}']" + ) + corrected = corrected.replace( + f".{col}", f".{actual_col}" + ) # Add null handling for string operations if '.str.' in corrected and 'fillna' not in corrected: @@ -156,41 +142,19 @@ def suggest_corrections(self, code: str) -> Optional[str]: return corrected return None - def validate_query(self, code: str) -> Tuple[bool, List[str]]: - """ - Validate a pandas query code. - - Returns: - Tuple[bool, List[str]]: (is_valid, list_of_errors) - """ + def validate_query(self, code: str) -> tuple[bool, list[str]]: + """Validate a pandas query code.""" errors = [] - # Run all checks + # Run all essential checks errors.extend(self._check_column_existence(code)) errors.extend(self._check_operation_compatibility(code)) - errors.extend(self._check_null_handling(code)) errors.extend(self._check_aggregation_usage(code)) - is_valid = len(errors) == 0 - - return is_valid, errors - - - def validate_pandas_query(self, code: str) -> Dict: - """ - Validate a pandas query and suggest corrections if needed. - - Args: - df: Input DataFrame - code: Pandas query code to validate + return len(errors) == 0, errors - Returns: - Dictionary containing: - - 'code': Original code string - - 'is_valid': Boolean indicating if code is valid - - 'errors': List of validation errors - - 'suggested_correction': Suggested correction string or None - """ + def get_validation_result(self, code: str) -> Dict: + """Get comprehensive validation results.""" is_valid, errors = self.validate_query(code) suggested_correction = None From ca1f6774839eb67bb99fa200e7dd5519a43bc624 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 15:44:56 +0000 Subject: [PATCH 7/9] improvement to prompt and validator --- examples/example.py | 8 +-- src/pandas_query.py | 71 +++++++++++++++++++----- src/pandas_validator.py | 116 +++++++++++++++++++++++++--------------- 3 files changed, 135 insertions(+), 60 deletions(-) diff --git a/examples/example.py b/examples/example.py index 373c224..371a4ab 100644 --- a/examples/example.py +++ b/examples/example.py @@ -1,3 +1,4 @@ +import json import pandas as pd from src.pandas_query import PandasQuery @@ -5,16 +6,15 @@ df = pd.read_csv("customers-100.csv") # Create query executor -querier = PandasQuery(validate=True) +querier = PandasQuery(validate=True, temperature=0.2) # Execute query try: - result = querier.execute(df, "Get a table of all customers who have a first name beginning with 'D'?") + result = querier.execute(df, "Get a table of all customers who have a first name beginning with 'D' and who live in a city with exactly two e's in it?") # Get complete results as a dictionary - result_dict = result.model_dump() # Pydantic v2 syntax (or use .dict() for v1) + result_dict = result.model_dump() print("\nComplete results:") - import json print(json.dumps(result_dict, indent=2)) # df of results diff --git a/src/pandas_query.py b/src/pandas_query.py index e69fb4a..f740742 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -195,20 +195,63 @@ def _build_prompt(self, df: pd.DataFrame, query: str) -> str: }) prompt = f"""Given a pandas DataFrame with the following structure: -``` -{df_info} -``` - -Write a single line of Python code that answers this question: {query} - -Requirements: -1. Assign result to 'result' variable -2. Handle null values appropriately -3. Use type-safe operations (pd.to_numeric for string numbers) -4. Use proper string operations (.str) and datetime (.dt) accessors -5. Return only the code, no explanations - -Available methods: {', '.join(self.restricted_globals.keys())}""" + ``` + {df_info} + ``` + + Write a single line of Python code that answers this question: {query} + + Requirements: + 1. Assign result to 'result' variable + 2. Handle null values appropriately: + - For string operations: Use .fillna('') before .str operations + - For numeric operations: Use .fillna(0) or .dropna() as appropriate + - For boolean operations: Use .fillna(False) + + 3. String Operations Guidelines: + - Use .str accessor for string operations + - For case-insensitive matching: Use .str.lower() + - For counting: Use .str.count(pattern) + - For starts/ends with: Use .str.startswith() or .str.endswith() + - For contains: Use .str.contains(pattern, case=True/False) + - Always handle null values before string operations + + 4. Numeric Operations Guidelines: + - For string-to-numeric conversion: Use pd.to_numeric(df['column'], errors='coerce') + - For aggregations (sum, mean, etc.), only use with groupby + - For comparisons, use standard operators (>, <, >=, <=, ==, !=) + + 5. Date Operations Guidelines: + - Use .dt accessor for datetime operations + - Common attributes: .dt.year, .dt.month, .dt.day + - For date comparisons, use standard operators + + 6. Filtering Guidelines: + - Use boolean indexing: df[condition] + - For multiple conditions, use & (and) and | (or) with parentheses + - Example: df[(condition1) & (condition2)] + + 7. Return Guidelines: + - Return only the matching rows unless aggregation is specifically requested + - Do not include explanatory comments in the code + - Keep to a single line of code + + Available String Operations: + - Basic: contains, startswith, endswith, lower, upper, strip + - Count/Match: count, match, extract, find, findall + - Transform: replace, pad, center, slice, split + + Available Numeric Operations: + - Comparisons: >, <, >=, <=, ==, != + - Aggregations (with groupby only): sum, mean, median, min, max, count + + Example Patterns: + - String search: df[df['column'].fillna('').str.contains('pattern')] + - Multiple conditions: df[(df['col1'] > 0) & (df['col2'].str.startswith('prefix'))] + - Numeric filtering: df[pd.to_numeric(df['column'], errors='coerce') > value] + - Case-insensitive: df[df['column'].fillna('').str.lower().str.contains('pattern')] + + Return only the code, no explanations.""" return prompt diff --git a/src/pandas_validator.py b/src/pandas_validator.py index d8f0f68..9e23df0 100644 --- a/src/pandas_validator.py +++ b/src/pandas_validator.py @@ -15,7 +15,11 @@ def __init__(self, df: pd.DataFrame): # Valid pandas operations by data type - simplified to most common operations self.valid_operations = { 'object': { - 'string_ops': {'contains', 'startswith', 'endswith'}, + 'string_ops': { + 'contains', 'startswith', 'endswith', 'count', # Added count explicitly + 'lower', 'upper', 'strip', 'len', 'slice', 'extract', + 'find', 'findall', 'replace', 'pad', 'center', 'split' + }, 'comparisons': {'==', '!=', 'isin'} }, 'number': { @@ -32,41 +36,41 @@ def __init__(self, df: pd.DataFrame): } # Common pandas aggregation functions that require groupby + # Removed 'count' from here since it's also a string operation self.group_required_aggs = { - 'sum', 'mean', 'median', 'min', 'max', 'count' + 'sum', 'mean', 'median', 'min', 'max' } - def _extract_column_references(self, code: str) -> set[str]: - """Extract column references from the code.""" - import re - # Match patterns like df['column'] or df.column - pattern = r"df[\['](\w+)[\]']|df\.(\w+)" - matches = re.findall(pattern, code) - # Flatten and filter matches - return {match[0] or match[1] for match in matches} - def _extract_operations(self, code: str) -> list[str]: - """Extract pandas operations from the code.""" + + def _extract_operations(self, code: str) -> Dict[str, List[str]]: + """Extract pandas operations from the code, categorizing them by type.""" import re - # Match method calls on df or column references - pattern = r'\.(\w+)\(' - return re.findall(pattern, code) - def _check_column_existence(self, code: str) -> list[str]: - """Check if all referenced columns exist in the DataFrame.""" + # Match string operations specifically + str_pattern = r'\.str\.(\w+)' + str_ops = re.findall(str_pattern, code) + + # Match other operations, excluding string operations + other_pattern = r'(? list[str]: + """Check for valid aggregation function usage.""" errors = [] - referenced_columns = self._extract_column_references(code) + operations = self._extract_operations(code) - for col in referenced_columns: - if col not in self.columns: - similar_cols = [ - existing_col for existing_col in self.columns - if existing_col.lower() == col.lower() - ] - error_msg = f"Column '{col}' does not exist in DataFrame" - if similar_cols: - error_msg += f". Did you mean '{similar_cols[0]}'?" - errors.append(error_msg) + # Check only non-string operations for aggregation requirements + for op in operations['other_ops']: + if op in self.group_required_aggs: # count is no longer here + if 'groupby' not in code: + error_msg = f"Aggregation '{op}' used without groupby" + errors.append(error_msg) return errors @@ -88,36 +92,64 @@ def _check_operation_compatibility(self, code: str) -> list[str]: else 'object' ) + # Check string operations + if operations['string_ops']: + if dtype_category != 'object': + errors.append( + f"String operations used on non-string column '{col}' " + f"of type {dtype}" + ) + else: + for op in operations['string_ops']: + if op not in self.valid_operations['object']['string_ops']: + errors.append( + f"String operation '{op}' may not be valid for " + f"column '{col}'" + ) + + # Check other operations if dtype_category in self.valid_operations: valid_ops = set().union( *self.valid_operations[dtype_category].values() ) - - for op in operations: + for op in operations['other_ops']: if op not in valid_ops and op not in self.group_required_aggs: - error_msg = ( + errors.append( f"Operation '{op}' may not be compatible with " f"column '{col}' of type {dtype}" ) - errors.append(error_msg) return errors - def _check_aggregation_usage(self, code: str) -> list[str]: - """Check for valid aggregation function usage.""" + def _extract_column_references(self, code: str) -> set[str]: + """Extract column references from the code.""" + import re + # Match patterns like df['column'] or df.column + pattern = r"df[\['](\w+)[\]']|df\.(\w+)" + matches = re.findall(pattern, code) + # Flatten and filter matches + return {match[0] or match[1] for match in matches} + + + def _check_column_existence(self, code: str) -> list[str]: + """Check if all referenced columns exist in the DataFrame.""" errors = [] - operations = self._extract_operations(code) + referenced_columns = self._extract_column_references(code) - for op in operations: - if op in self.group_required_aggs: - if 'groupby' not in code and not any( - c in code for c in ['sum()', 'mean()', 'count()'] - ): - error_msg = f"Aggregation '{op}' used without groupby" - errors.append(error_msg) + for col in referenced_columns: + if col not in self.columns: + similar_cols = [ + existing_col for existing_col in self.columns + if existing_col.lower() == col.lower() + ] + error_msg = f"Column '{col}' does not exist in DataFrame" + if similar_cols: + error_msg += f". Did you mean '{similar_cols[0]}'?" + errors.append(error_msg) return errors + def suggest_corrections(self, code: str) -> Optional[str]: """Attempt to suggest corrections for common issues.""" corrected = code From 31486cf70d2b2af8c8b16262ad9a9229c42c35b3 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 16:30:37 +0000 Subject: [PATCH 8/9] before merge to main --- examples/example.py | 57 ++++++++++++++++++++++++++++++++++++++--- requirements.txt | 23 +++-------------- src/pandas_query.py | 4 +-- src/pandas_validator.py | 5 ++-- 4 files changed, 61 insertions(+), 28 deletions(-) diff --git a/examples/example.py b/examples/example.py index 371a4ab..ba55d18 100644 --- a/examples/example.py +++ b/examples/example.py @@ -6,11 +6,14 @@ df = pd.read_csv("customers-100.csv") # Create query executor -querier = PandasQuery(validate=True, temperature=0.2) +querier = PandasQuery(validate=True, temperature=0) # Execute query try: - result = querier.execute(df, "Get a table of all customers who have a first name beginning with 'D' and who live in a city with exactly two e's in it?") + # query = "Get a table of all customers who have a first name beginning with 'D' and who live in a city with exactly two e's in it?" + # query = "Get a subtable of people who live in Panama" + query = "Get a subtable of people whos surname backwards is: 'nosdodn' or 'atam'" + result = querier.execute(df, query) # Get complete results as a dictionary result_dict = result.model_dump() @@ -23,4 +26,52 @@ print(df_result) except Exception as e: - print(f"Error executing query: {str(e)}") \ No newline at end of file + print(f"Error executing query: {str(e)}") + +""" +Complete results: +{ + "query": "Get a subtable of people whos surname backwards is: 'nosdodn' or 'atam'", + "code": "result = df[df['Last Name'].fillna('').str[::-1].str.lower().isin(['nosdodn', 'atam'])]", + "is_valid": true, + "errors": [], + "result": [ + { + "Index": 13, + "Customer Id": "e35426EbDEceaFF", + "First Name": "Tracey", + "Last Name": "Mata", + "Company": "Graham-Francis", + "City": "South Joannamouth", + "Country": "Togo", + "Phone 1": "001-949-844-8787", + "Phone 2": "(855)713-8773", + "Email": "alex56@walls.org", + "Subscription Date": "2021-12-02", + "Website": "http://www.beck.com/" + }, + { + "Index": 18, + "Customer Id": "F8Aa9d6DfcBeeF8", + "First Name": "Greg", + "Last Name": "Mata", + "Company": "Valentine LLC", + "City": "Lake Leslie", + "Country": "Mozambique", + "Phone 1": "(701)087-2415", + "Phone 2": "(195)156-1861x26241", + "Email": "jaredjuarez@carroll.org", + "Subscription Date": "2022-03-26", + "Website": "http://pitts-cherry.com/" + } + ] +} + +Here is a table of the output results: + + Index Customer Id ... Subscription Date Website +12 13 e35426EbDEceaFF ... 2021-12-02 http://www.beck.com/ +17 18 F8Aa9d6DfcBeeF8 ... 2022-03-26 http://pitts-cherry.com/ + +[2 rows x 12 columns] +""" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index cc53efc..e157541 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,4 @@ -aiohttp -aiosignal -async-timeout -attrs -certifi -charset-normalizer -frozenlist -idna -multidict -numpy -openai -pandas -python-dateutil -pytz -requests RestrictedPython -six -tqdm -tzdata -urllib3 -yarl +pandas +openai +pydantic \ No newline at end of file diff --git a/src/pandas_query.py b/src/pandas_query.py index f740742..617619c 100644 --- a/src/pandas_query.py +++ b/src/pandas_query.py @@ -1,4 +1,4 @@ - +# src/pandas_query.py import pandas as pd import numpy as np from RestrictedPython import compile_restricted @@ -6,7 +6,7 @@ from RestrictedPython.Eval import default_guarded_getattr, default_guarded_getitem, default_guarded_getiter from openai import OpenAI import os -from typing import Dict, Any, Optional, Union, List +from typing import Dict, Any, Optional, List from pydantic import BaseModel, Field, validator from .pandas_validator import PandasQueryValidator diff --git a/src/pandas_validator.py b/src/pandas_validator.py index 9e23df0..c06298e 100644 --- a/src/pandas_validator.py +++ b/src/pandas_validator.py @@ -1,7 +1,6 @@ -import re +# src/pandas_validator.py import pandas as pd -import numpy as np -from typing import Dict, List, Set, Tuple, Optional +from typing import Dict, List, Optional class PandasQueryValidator: From ce3f21dc7389dce21645de1cd7f881e7cde94bc0 Mon Sep 17 00:00:00 2001 From: CivilEngineerUK Date: Thu, 21 Nov 2024 16:38:22 +0000 Subject: [PATCH 9/9] readme update --- README.md | 129 +++++++++++++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 725f971..03e5635 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,20 @@ # pandas-LLM ## Introduction -pandas-llm is a lightweight Python library that extends pandas to allow querying datasets using OpenAI prompts. This powerful tool leverages the natural language processing capabilities of OpenAI to offer intuitive, language-based querying of your Pandas dataframes. +pandas-llm is a lightweight Python library that extends pandas to allow querying datasets using OpenAI prompts. This powerful tool leverages the natural language processing capabilities of OpenAI to offer intuitive, language-based querying of your Pandas dataframes with built-in validation and safety features. ## Key Features -- **Natural Language Querying**: With pandas-llm, you can execute complex Pandas queries using natural language prompts. Instead of writing code, you can express your query in plain language and obtain the desired results. +- **Natural Language Querying**: Execute complex Pandas queries using natural language prompts. Instead of writing code, express your query in plain language and obtain the desired results. -- **Data Privacy**: Your data is not sent on the Internet. Pandas-LLM works locally with your data and uses openAI to create the query based on the dataframe columns and data types, not its content. +- **Data Privacy**: Your data stays local. Pandas-LLM works with your data locally and uses OpenAI to create queries based on dataframe metadata (columns and data types), not its content. -- **Seamless Integration**: The library seamlessly integrates with your existing Pandas workflow. You can continue using normal Pandas functions and syntax while leveraging the added capability of natural language queries. +- **Query Validation**: Built-in validation ensures generated queries are safe and compatible with your data types, preventing common errors and ensuring reliable results. -- **Efficiency and Performance**: pandas-LLM is designed to deliver efficient and performant querying capabilities. It uses OpenAI's language model to process queries quickly and accurately, providing rapid insights from your data. +- **Safe Execution**: Uses RestrictedPython for sandboxed execution of generated queries, providing an additional layer of security. -- **Flexible and Expressive**: Whether you need to filter, aggregate, sort, or transform your data, pandas-LLM allows you to express your requirements flexibly and expressively. You can perform complex operations on your dataframes with ease using human-readable language. +- **Serializable Results**: Results are automatically converted to JSON-serializable formats, making it easy to store or transmit query results. -- **Intelligent Results**: The library returns the results of your queries in a concise and understandable format. You can extract valuable insights from your data without complex code or manual analysis. - -With pandas-llm, you can unlock the power of natural language querying and effortlessly execute complex pandas queries. Let the library handle the intricacies of data manipulation while you focus on gaining insights and making data-driven decisions. +- **Type-Safe Operations**: Intelligent handling of different data types including strings, numbers, dates, and boolean values with appropriate null value handling. ## Installation @@ -26,77 +24,90 @@ Install pandas-llm using pip: pip install pandas-llm ``` -## Features -- Query pandas dataframes using natural language prompts. -- Leverage the power of OpenAI's language models in your data analysis. -- Seamless integration with existing pandas functions. - ## Usage -Here's a quick [example](https://github.com/DashyDashOrg/pandas-llm/blob/main/pandas_llm/example.py) of how to use pandas-llm: +Here's a basic example of how to use pandas-llm: ```python -import os +import json import pandas as pd -from src import PandasLLM - -# Data -# Please note that these names, ages, and donations are randomly generated -# and do not correspond to real individuals or their donations. -data = [('John Doe', 25, 50), - ('Jane Smith', 38, 70), - ('Alex Johnson', 45, 80), - ('Jessica Brown', 60, 40), - ('Michael Davis', 22, 90), - ('Emily Wilson', 30, 60), - ('Daniel Taylor', 35, 75), - ('Sophia Moore', 40, 85), - ('David Thomas', 50, 65), - ('Olivia Jackson', 29, 55)] -df = pd.DataFrame(data, columns=['name', 'age', 'donation']) - -conv_df = PandasLLM(data=df, llm_api_key=os.environ.get("OPENAI_API_KEY")) -result = conv_df.prompt("What is the average donation of people older than 40 who donated more than $50?") -code = conv_df.code_block - -print(f"Executing the following expression of type {type(result)}:\n{code}\n\nResult is:\n {result}\n") -# Executing the following expression of type : -# result = df.loc[(df['age'] > 40) & (df['donation'] > 50), 'donation'].mean() - -# Result is: -# 72.5 +from src.pandas_query import PandasQuery -``` +# Create sample DataFrame +df = pd.read_csv("customers-100.csv") -There is also a chatbot available in the repository using the same dataset. -Look at [Chatbot example](https://github.com/DashyDashOrg/pandas-llm/blob/main/pandas_llm/example-chatbot.py) +# Create query executor +querier = PandasQuery(validate=True, temperature=0) -## PandasLLM Class Constructor +# Execute query +try: + # query = "Get a table of all customers who have a first name beginning with 'D' and who live in a city with exactly two e's in it?" + # query = "Get a subtable of people who live in Panama" + query = "Get a subtable of people whos surname backwards is: 'nosdodn' or 'atam'" + result = querier.execute(df, query) -The constructor for the PandasLLM class has been enhanced in this release to provide more flexibility and control over the language model interaction. The constructor accepts the following arguments: + # Get complete results as a dictionary + result_dict = result.model_dump() + print("\nComplete results:") + print(json.dumps(result_dict, indent=2)) -**data** (mandatory): The data to be used. It can be a Pandas DataFrame, a list of lists, tuples, dictionaries, a dictionary, a string, or a list. + # df of results + print('\nHere is a table of the output results:\n') + df_result = pd.DataFrame(result.result) + print(df_result) -**llm_engine** (optional): The name of the LLM engine to use. Currently, only OpenAI is supported. Defaults to "openai". +except Exception as e: + print(f"Error executing query: {str(e)}") +``` -**llm_params** (optional): A dictionary of parameters to be used with the OpenAI API. This allows customization of the LLM behavior. Defaults to model=gpt-3.5-turbo and temperature=0.2. +## Query Result Structure +The library returns a QueryResult object with the following attributes: -**prompt_override** (optional): A boolean that determines whether or not the prompt is overridden. If set to True, the custom prompt becomes the main prompt. Defaults to False. +```python +{ + "query": str, # Original natural language query + "code": str, # Generated pandas code + "is_valid": bool, # Whether the query passed validation + "errors": List[str], # Any validation or execution errors + "result": Any # Query results (automatically serialized) +} +``` -**custom_prompt** (optional): A string that can be provided if prompt_override is False. The custom prompt will be added to the default pandas_llm prompt. Defaults to an empty string. +## Supported Operations +The library supports a wide range of pandas operations: -**path** (optional): The path to the file where the debug data will be saved. If not specified, debug data files will not be generated. +### String Operations +- Basic: contains, startswith, endswith, lower, upper, strip +- Count/Match: count, match, extract, find, findall +- Transform: replace, pad, center, slice, split -**verbose** (optional): A boolean determines whether debugging information will be printed. If set to True, additional debugging info will be displayed. Defaults to False. +### Numeric Operations +- Comparisons: >, <, >=, <=, ==, != +- Aggregations (with groupby): sum, mean, median, min, max, count -**data_privacy** (optional): A boolean determines whether the data is treated as private. If set to True, the function will not send the data content to OpenAI. Defaults to True. +### Date Operations +- Attributes: year, month, day +- Comparisons: >, <, >=, <=, ==, != -**llm_api_key** (optional): The OpenAI API key to be used. The library will attempt to use the default API key configured if not provided. +### Advanced Features +- Automatic null handling appropriate to data type +- Type-safe operations with proper conversions +- Multi-condition filtering with proper parentheses +- Case-sensitive and case-insensitive string operations -**force_sandbox** (optional): A boolean determining the fallback behaviour if the sandbox environment fails. If set to False and the sandbox fails, the library will retry using eval, which is less safe. Defaults to False. +## Configuration +The PandasQuery constructor accepts the following parameters: +```python +PandasQuery( + model: str = "gpt-4", # OpenAI model to use + temperature: float = 0.2, # Temperature for query generation + api_key: Optional[str] = None, # OpenAI API key + validate: bool = True # Enable/disable query validation +) +``` ## Contributing Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. Please make sure to update tests as appropriate. ## License -MIT +MIT \ No newline at end of file