Skip to content

Commit add8286

Browse files
committed
maggot fixes, improvements, os_command expression, and more
1 parent 6550b0e commit add8286

14 files changed

+228
-42
lines changed

symai/backend/engine_embedding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def __init__(self):
1717
openai.api_key = config['EMBEDDING_ENGINE_API_KEY']
1818
self.model = config['EMBEDDING_ENGINE_MODEL']
1919
self.pricing = self.api_pricing()
20+
self.max_tokens = self.api_max_tokens()
2021

2122
def command(self, wrp_params):
2223
super().command(wrp_params)

symai/backend/engine_file.py

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,47 @@ class FileEngine(Engine):
1010
def __init__(self):
1111
super().__init__()
1212

13+
def reset_eof_of_pdf_return_stream(self, pdf_stream_in: list):
14+
actual_line = len(pdf_stream_in) # Predefined value in case EOF not found
15+
# find the line position of the EOF
16+
for i, x in enumerate(pdf_stream_in[::-1]):
17+
if b'%%EOF' in x:
18+
actual_line = len(pdf_stream_in)-i
19+
print(f'EOF found at line position {-i} = actual {actual_line}, with value {x}')
20+
break
21+
22+
# return the list up to that point
23+
return pdf_stream_in[:actual_line]
24+
25+
def fix_pdf(self, file_path: str):
26+
# opens the file for reading
27+
with open(file_path, 'rb') as p:
28+
txt = (p.readlines())
29+
30+
# get the new list terminating correctly
31+
txtx = self.reset_eof_of_pdf_return_stream(txt)
32+
33+
# write to new pdf
34+
new_file_path = f'{file_path}_fixed.pdf'
35+
with open(new_file_path, 'wb') as f:
36+
f.writelines(txtx)
37+
38+
fixed_pdf = PyPDF2.PdfReader(new_file_path)
39+
return fixed_pdf
40+
41+
def read_text(self, pdf_reader, range_):
42+
txt = ''
43+
n_pages = len(pdf_reader.pages)
44+
if range_ is None:
45+
for i in range(n_pages):
46+
page = pdf_reader.pages[i]
47+
txt += page.extract_text()
48+
else:
49+
for i in range(n_pages)[range_]:
50+
page = pdf_reader.pages[i]
51+
txt += page.extract_text()
52+
return txt
53+
1354
def forward(self, *args, **kwargs) -> List[str]:
1455
path = kwargs['prompt']
1556
input_handler = kwargs['input_handler'] if 'input_handler' in kwargs else None
@@ -22,22 +63,30 @@ def forward(self, *args, **kwargs) -> List[str]:
2263
if isinstance(range_, tuple) or isinstance(range_, list):
2364
range_ = slice(*range_)
2465

25-
if 'pdf' in path:
66+
if '.pdf' in path:
2667
rsp = ''
27-
with open(str(path), 'rb') as f:
28-
# creating a pdf reader object
29-
pdf_reader = PyPDF2.PdfReader(f)
30-
n_pages = len(pdf_reader.pages)
31-
if range_ is None:
32-
for i in range(n_pages):
33-
page = pdf_reader.pages[i]
34-
rsp += page.extract_text()
35-
else:
36-
for i in range(n_pages)[range_]:
37-
page = pdf_reader.pages[i]
38-
rsp += page.extract_text()
68+
try:
69+
with open(str(path), 'rb') as f:
70+
# creating a pdf reader object
71+
pdf_reader = PyPDF2.PdfReader(f)
72+
rsp = self.read_text(pdf_reader, range_)
73+
except Exception as e:
74+
print(f'Error reading PDF: {e} | {path}')
75+
if 'fix_pdf' not in kwargs or not kwargs['fix_pdf']:
76+
raise e
77+
fixed_pdf = self.fix_pdf(str(path))
78+
pdf_reader_fixed = PyPDF2.PdfReader(fixed_pdf)
79+
rsp = self.read_text(pdf_reader_fixed, range_)
3980
else:
40-
rsp = unpack.from_file(str(path))['content']
81+
try:
82+
file_ = unpack.from_file(str(path))
83+
if 'content' in file_:
84+
rsp = file_['content']
85+
else:
86+
rsp = str(file_)
87+
except Exception as e:
88+
print(f'Error reading file: {e} | {path}')
89+
raise e
4190

4291
output_handler = kwargs['output_handler'] if 'output_handler' in kwargs else None
4392
if output_handler:

symai/backend/engine_gptX_chat.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def compute_remaining_tokens(self, prompts: dict) -> int:
3232
# iterate over prompts and compute number of tokens
3333
prompts_ = [role['content'] for role in prompts]
3434
prompt = ''.join(prompts_)
35-
val = len(self.tokenizer.encode(prompt))
35+
val = len(self.tokenizer.encode(prompt, disallowed_special=()))
3636
return int((self.max_tokens - val) * 0.98)
3737

3838
def forward(self, prompts: List[str], *args, **kwargs) -> List[str]:
@@ -55,14 +55,14 @@ def forward(self, prompts: List[str], *args, **kwargs) -> List[str]:
5555

5656
try:
5757
res = openai.ChatCompletion.create(model=model,
58-
messages=prompts_,
59-
max_tokens=max_tokens,
60-
temperature=temperature,
61-
frequency_penalty=frequency_penalty,
62-
presence_penalty=presence_penalty,
63-
top_p=top_p,
64-
stop=stop,
65-
n=1)
58+
messages=prompts_,
59+
max_tokens=max_tokens,
60+
temperature=temperature,
61+
frequency_penalty=frequency_penalty,
62+
presence_penalty=presence_penalty,
63+
top_p=top_p,
64+
stop=stop,
65+
n=1)
6666
output_handler = kwargs['output_handler'] if 'output_handler' in kwargs else None
6767
if output_handler:
6868
output_handler(res)

symai/backend/engine_gptX_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def command(self, wrp_params):
3131
def compute_remaining_tokens(self, prompts: list) -> int:
3232
# iterate over prompts and compute number of tokens
3333
prompt = prompts[0]
34-
val = len(self.tokenizer.encode(prompt))
34+
val = len(self.tokenizer.encode(prompt, disallowed_special=()))
3535
return int((self.max_tokens - val) * 0.98)
3636

3737
def forward(self, prompts: List[str], *args, **kwargs) -> List[str]:

symai/backend/mixin/openai.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,6 @@ def api_max_tokens(self):
5656
self.model == 'babbage' or \
5757
self.model == 'ada':
5858
return 2_049
59+
60+
elif self.model == 'text-embedding-ada-002':
61+
return 8_191

symai/extended/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1+
from .arxiv_pdf_parser import *
2+
from .conversation import *
13
from .crawler import *
24
from .document import *
5+
from .file_merger import *
36
from .graph import *
47
from .html_style_template import *
58
from .packages import *
6-
from .file_merger import *
79
from .repo_cloner import *
810
from .solver import *
911
from .summarizer import *
10-
from .conversation import *
11-
from .arxiv_pdf_parser import *

symai/extended/arxiv_pdf_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def __init__(self, url_pattern: str = r'https://arxiv.org/(?:pdf|abs)/(\d+.\d+)(
1515
self.url_pattern = url_pattern
1616
self.merger = FileMerger()
1717

18-
def forward(self, data: Symbol) -> Symbol:
18+
def forward(self, data: Symbol, **kwargs) -> Symbol:
1919
# Extract all urls from the data
2020
urls = re.findall(self.url_pattern, str(data))
2121

@@ -39,7 +39,7 @@ def forward(self, data: Symbol) -> Symbol:
3939
print('%r generated an exception: %s' % (url, exc))
4040

4141
# Merge all pdfs into one file
42-
merged_file = self.merger(output_path)
42+
merged_file = self.merger(output_path, **kwargs)
4343

4444
# Return the merged file as a Symbol
4545
return_file = self._to_symbol(merged_file)

symai/extended/document.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55

66
class DocumentRetriever(Expression):
7-
def __init__(self, file_path: str, index_name: str = Indexer.DEFAULT):
7+
def __init__(self, file_path: str, index_name: str = Indexer.DEFAULT, **kwargs):
88
super().__init__()
99
reader = FileReader()
1010
indexer = Indexer(index_name=index_name)
11-
text = reader(file_path)
12-
self.index = indexer(text)
11+
text = reader(file_path, **kwargs)
12+
self.index = indexer(text, **kwargs)
1313

1414
def forward(self, query: Optional[Symbol]) -> Symbol:
1515
return self.index(query)

symai/extended/file_merger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self, file_endings: List[str] = ['.py', '.md', '.txt', '.sh', '.pdf
1616
self.file_excludes = file_excludes
1717
self.reader = FileReader()
1818

19-
def forward(self, root_path: str) -> Symbol:
19+
def forward(self, root_path: str, **kwargs) -> Symbol:
2020
"""
2121
Method to find, read, merge and return contents of files in the form of a Symbol starting from the root_path.
2222
@@ -37,7 +37,7 @@ def forward(self, root_path: str) -> Symbol:
3737
# Look only for files with the specified endings
3838
if file.endswith(tuple(self.file_endings)):
3939
# Read in the file using the FileReader
40-
file_content = self.reader(file_path).value
40+
file_content = self.reader(file_path, **kwargs).value
4141

4242
# Append start and end markers for each file
4343
file_content = f"# ----[FILE_START] {file_path}\n" + \

symai/extended/graph.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,12 @@ class Graph(Expression):
2626
def static_context(self) -> str:
2727
return GRAPH_DESCRIPTION
2828

29-
def __init__(self, formatter: Callable = SentenceFormatter(), n_workers: int = 4):
29+
def __init__(self, formatter: Callable = SentenceFormatter(), n_workers: int = 1, verbose: bool = False):
3030
super().__init__()
31-
self.formatter = formatter
32-
self.n_workers = n_workers
31+
self.formatter = formatter
32+
self.n_workers = n_workers
3333
self.sym_return_type = Graph
34+
self.verbose = verbose
3435

3536
def process_symbol(self, s, *args, **kwargs):
3637
res = ''
@@ -49,6 +50,7 @@ def _func(_, text) -> str:
4950
pass
5051

5152
if len(str(s)) > 0:
53+
if self.verbose: print(s)
5254
r = _func(self, s)
5355
rec = str(r)
5456
lines = rec.split('\n')
@@ -57,18 +59,27 @@ def _func(_, text) -> str:
5759
if len(l) > 0:
5860
csv = l.split(',')
5961
try:
60-
if len(csv) == 3 and csv[0].strip() != '' and csv[1].strip() != '' and csv[2].strip() > 0:
61-
test_ = int(csv[-1])
62+
if len(csv) == 3 and \
63+
csv[0].strip() != '' and \
64+
csv[1].strip() != '' and \
65+
int(csv[2].strip()) > 0:
6266
res += l + '\n'
63-
except:
67+
except Exception as e:
68+
if self.verbose: print(e)
6469
pass
6570
return res
6671

6772
def forward(self, sym: Symbol, **kwargs) -> Symbol:
6873
res = 'source,target,value\n'
6974
sym_list = self.formatter(sym).value
75+
if self.n_workers == 1:
76+
for s in sym_list:
77+
res += self.process_symbol(s)
78+
return res
7079
with Pool(self.n_workers) as p:
7180
results = p.map(self.process_symbol, sym_list)
7281
for r in results:
7382
res += r
7483
return res
84+
85+

symai/extended/os_command.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import platform
2+
import subprocess
3+
from typing import Callable, Dict, Iterator, List, Optional, Type
4+
5+
from ..components import Function
6+
from ..post_processors import CodeExtractPostProcessor
7+
from ..symbol import Expression, Symbol
8+
9+
Context = """[DESCRIPTION]:
10+
Adapt the user query to an OS patform command (commands must be executable in terminal, shell, bash or powershell)!
11+
Create only command adaptations based on programs that are specified in the programs list or are native platform commands!
12+
13+
[PROGRAM_LIST]:
14+
15+
{programs}
16+
For other command requests, reply sorry not supported.
17+
18+
[PLATFORM]:
19+
20+
The currently detected OS platform is:
21+
{platform}
22+
ONLY CREATE A COMMAND FOR THE CURRENT PLATFORM!
23+
24+
[USER_QUERY]:
25+
26+
{query}
27+
28+
[METADATA]:
29+
30+
Metadata is OPTIONAL and can help with the specificity of the command.
31+
{metadata}
32+
33+
---------------------------
34+
35+
[EXAMPLES]:
36+
37+
If the current platform is Windows, and the user query is: "open file.txt", then the command should be:
38+
```powershell
39+
notepad file.txt
40+
```
41+
If the current platform is Linux, and the user query is: "open file.txt", then the command should be:
42+
```shell
43+
gedit file.txt
44+
```
45+
If the current platform is Mac, and the user query is: "open file.txt", then the command should be:
46+
```bash
47+
open file.txt
48+
```
49+
If the current platform is Windows, and the user query requires to open Spotify and play Taylor Swift, and Spotify is in the programs list, then the command could look like:
50+
```powershell
51+
Start-Process 'spotify:track:Anti-Hero%20by%20Taylor%20Swift'
52+
```
53+
If the current platform is Windows, and the user query requires to open Spotify and play a song, and Spotify is in the programs list, and metadata is added, then the command could look like:
54+
```powershell
55+
Start-Process 'spotify:track:https://open.spotify.com/track/0V3wPSX9ygBnCm8psDIegu?si=81646e6079d34526'
56+
```
57+
58+
---------------------------
59+
60+
Write an executable command that starts a process according to the user query, platform and the programs list. The commnad should be one line and should be direcly executable in terminal, shell, bash or powershell.
61+
"""
62+
63+
64+
class OSCommand(Expression):
65+
def __init__(self, programs: List[str],
66+
metadata: Dict[str, str] = {},
67+
verbose: bool = False,
68+
os_platform: str = 'auto'):
69+
super().__init__()
70+
self.verbose: bool = verbose
71+
self.os_platform: str = os_platform
72+
self.programs: List[str] = programs
73+
self.metadata: Dict[str, str] = metadata
74+
75+
if self.os_platform == 'auto':
76+
self.os_platform = platform.platform()
77+
if len(programs) == 0:
78+
raise Exception('No programs specified!')
79+
80+
def execute_os_command(self, *args, **kwargs):
81+
command = args[0]
82+
print(f'Executing {self.os_platform} command: {command}')
83+
if 'linux' in self.os_platform.lower():
84+
return [subprocess.run(["bash", "-c", str(command)])]
85+
elif 'windows' in self.os_platform.lower():
86+
return [subprocess.run(["powershell", "-Command", str(command)])]
87+
elif 'mac' in self.os_platform.lower():
88+
return [subprocess.run(["bash", "-c", str(command)])]
89+
else:
90+
raise Exception('Unsupported platform!')
91+
92+
def forward(self, sym: Symbol, **kwargs) -> Expression:
93+
sym = self._to_symbol(sym)
94+
kwargs['verbose'] = self.verbose
95+
96+
prompt = Context.format(programs=self.programs,
97+
platform=self.os_platform,
98+
query=sym,
99+
metadata=self.metadata)
100+
command = sym.query(prompt, post_processors=[CodeExtractPostProcessor()], **kwargs)
101+
return self.sym_return_type(self.output(command, expr=self.execute_os_command))

symai/extended/repo_cloner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pathlib import Path
22
from typing import Optional
33

4-
# from git import Repo
4+
from git import Repo
55

66
from .. import Expression
77

0 commit comments

Comments
 (0)