Skip to content

Commit 92cd391

Browse files
Merge pull request #2393 from solliancenet/cj-legacy-vectorization-updates
(0.9.7-beta135.1) Updates to legacy vectorization
2 parents 1e00947 + b975d60 commit 92cd391

39 files changed

+1101
-407
lines changed

.vscode/launch.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@
8484
"args": ["main:app","--reload", "--port", "8000"],
8585
"python": "${workspaceFolder}/src/python/CodeSessionAPI/.venv/Scripts/python.exe"
8686
},
87+
{
88+
"name": "Python: Vectorization Pipeline",
89+
"type": "debugpy",
90+
"request": "launch",
91+
"program": "${file}",
92+
"console": "integratedTerminal",
93+
"cwd" : "${workspaceFolder}/samples/vectorization-pipeline",
94+
"python": "${workspaceFolder}/samples/vectorization-pipeline/.venv/Scripts/python.exe",
95+
"justMyCode": true
96+
},
8797
{
8898
"name": "Python: Current File",
8999
"type": "debugpy",
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This file is an example of the .env file that should be created in the same directory as this file.
2+
MANAGEMENT_API_SCOPE = "MANAGEMENT_API_CLIENT_ID/.default" # Replace MANAGEMENT_API_CLIENT_ID with the actual client ID of the Management API application.
3+
MANAGEMENT_API_ENDPOINT = "https://..." # Replace with the actual endpoint of the Management API application.
4+
FOUNDATIONALLM_INSTANCE_ID = "FOUNDATIONALLM_INSTANCE_ID" # Replace FOUNDATIONALLM_INSTANCE_ID with the actual FoundationaLLM instance identifier.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.env
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .management_client import ManagementClient
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
'''
2+
Management API client for FoundationLLM.
3+
This client is used to interact with the FoundationLLM Management API.
4+
'''
5+
6+
import requests
7+
from simple_jwt import jwt
8+
9+
from azure.identity import AzureCliCredential
10+
11+
12+
class ManagementClient:
13+
'''
14+
Management API client for FoundationLLM.
15+
This client is used to interact with the FoundationLLM Management API.
16+
'''
17+
18+
def __init__(
19+
self,
20+
management_api_scope: str,
21+
management_api_endpoint: str,
22+
foundationallm_instance_id: str):
23+
24+
self.__management_api_scope = management_api_scope
25+
self.__management_api_endpoint = management_api_endpoint
26+
self.__foundationallm_instance_id = foundationallm_instance_id
27+
28+
self.__credential = AzureCliCredential()
29+
self.__token = self.__credential.get_token(self.__management_api_scope).token
30+
31+
self.default_text_partitioning_profile = "text_partition_default"
32+
self.default_text_embedding_profile = "text_embedding_profile_gateway_embedding3large"
33+
34+
def __ensure_valid_token(self):
35+
'''
36+
Ensure that the token is valid.
37+
If the token is expired, get a new one.
38+
'''
39+
if not self.__token:
40+
self.__token = self.__credential.get_token(self.__management_api_scope).token
41+
42+
if jwt.is_expired(self.__token):
43+
self.__token = self.__credential.get_token(self.__management_api_scope).token
44+
45+
def __post_request(self, route:str, data: dict) -> dict:
46+
'''
47+
Post a request to the Management API.
48+
:param data: The data to post.
49+
:param route: The route to post to.
50+
:return: The response from the API.
51+
'''
52+
self.__ensure_valid_token()
53+
54+
headers = {
55+
"Authorization": f"Bearer {self.__token}",
56+
"Content-Type": "application/json"
57+
}
58+
59+
response = requests.post(
60+
f"{self.__management_api_endpoint}/instances/{self.__foundationallm_instance_id}/{route}",
61+
headers=headers,
62+
json=data,
63+
timeout=60
64+
)
65+
66+
if response.status_code != 200:
67+
raise Exception(f"Error posting to Management API: {response.text}")
68+
69+
return response.json()
70+
71+
def __get_request(self, route:str) -> dict:
72+
'''
73+
Post a request to the Management API.
74+
:param data: The data to post.
75+
:param route: The route to post to.
76+
:return: The response from the API.
77+
'''
78+
self.__ensure_valid_token()
79+
80+
headers = {
81+
"Authorization": f"Bearer {self.__token}"
82+
}
83+
84+
response = requests.get(
85+
f"{self.__management_api_endpoint}/instances/{self.__foundationallm_instance_id}/{route}",
86+
headers=headers,
87+
timeout=60
88+
)
89+
90+
if response.status_code != 200:
91+
raise Exception(f"Error posting to Management API: {response.text}")
92+
93+
return response.json()
94+
95+
def create_vectorization_pipeline(
96+
self,
97+
pipeline_name: str,
98+
pipeline_description: str,
99+
data_source_name: str,
100+
vector_store_name: str
101+
) -> dict:
102+
103+
result = self.__post_request(
104+
f"providers/FoundationaLLM.Vectorization/vectorizationPipelines/{pipeline_name}",
105+
{
106+
"name": pipeline_name,
107+
"description": pipeline_description,
108+
"data_source_object_id": f"/instances/{self.__foundationallm_instance_id}/providers/FoundationaLLM.DataSource/dataSources/{data_source_name}",
109+
"text_partitioning_profile_object_id": f"/instances/{self.__foundationallm_instance_id}/providers/FoundationaLLM.Vectorization/textPartitioningProfiles/{self.default_text_partitioning_profile}",
110+
"text_embedding_profile_object_id": f"/instances/{self.__foundationallm_instance_id}/providers/FoundationaLLM.Vectorization/textEmbeddingProfiles/{self.default_text_embedding_profile}",
111+
"indexing_profile_object_id": f"/instances/{self.__foundationallm_instance_id}/providers/FoundationaLLM.Vectorization/indexingProfiles/{vector_store_name}",
112+
}
113+
)
114+
115+
return result
116+
117+
def get_vectorization_pipeline(
118+
self,
119+
pipeline_name: str
120+
) -> dict:
121+
122+
result = self.__get_request(
123+
f"providers/FoundationaLLM.Vectorization/vectorizationPipelines/{pipeline_name}"
124+
)
125+
126+
return result
127+
128+
def activate_vectorization_pipeline(
129+
self,
130+
pipeline_name: str
131+
) -> dict:
132+
133+
result = self.__post_request(
134+
f"providers/FoundationaLLM.Vectorization/vectorizationPipelines/{pipeline_name}/activate",
135+
{}
136+
)
137+
138+
return result
139+
140+
def get_vectorization_pipeline_execution(
141+
self,
142+
pipeline_name: str,
143+
execution_id: str
144+
) -> dict:
145+
146+
result = self.__get_request(
147+
f"providers/FoundationaLLM.Vectorization/vectorizationPipelines/{pipeline_name}/vectorizationPipelineExecutions/{execution_id}"
148+
)
149+
150+
return result
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
'''
2+
This sample shows how to configure a vectorization pipeline using the FoundationaLLM Management API.
3+
'''
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from clients import ManagementClient
9+
10+
# Load environment variables from .env file
11+
load_dotenv()
12+
13+
management_client = ManagementClient(
14+
os.getenv("MANAGEMENT_API_SCOPE"),
15+
os.getenv("MANAGEMENT_API_ENDPOINT"),
16+
os.getenv("FOUNDATIONALLM_INSTANCE_ID")
17+
)
18+
19+
DATA_SOURCE_NAME = "default-storage"
20+
VECTOR_STORE_NAME = "default-index"
21+
22+
result = management_client.create_vectorization_pipeline(
23+
"Test02",
24+
"Sample vectorization pipeline.",
25+
DATA_SOURCE_NAME,
26+
VECTOR_STORE_NAME
27+
)
28+
29+
print(result)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
azure-identity==1.21.0
2+
python-dotenv==1.1.0
3+
simple-jwt-decode==0.10.0
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
'''
2+
This sample shows how to configure a vectorization pipeline using the FoundationaLLM Management API.
3+
'''
4+
5+
import os
6+
from dotenv import load_dotenv
7+
8+
from clients import ManagementClient
9+
10+
# Load environment variables from .env file
11+
load_dotenv()
12+
13+
management_client = ManagementClient(
14+
os.getenv("MANAGEMENT_API_SCOPE"),
15+
os.getenv("MANAGEMENT_API_ENDPOINT"),
16+
os.getenv("FOUNDATIONALLM_INSTANCE_ID")
17+
)
18+
19+
VECTORIZATION_PIPELINE_NAME = "Test02"
20+
21+
result = management_client.activate_vectorization_pipeline(
22+
VECTORIZATION_PIPELINE_NAME
23+
)
24+
print(result)
25+
26+
result = management_client.get_vectorization_pipeline(
27+
VECTORIZATION_PIPELINE_NAME
28+
)
29+
print(result)
30+
31+
result = management_client.get_vectorization_pipeline_execution(
32+
VECTORIZATION_PIPELINE_NAME,
33+
result["resource"]["latest_execution_id"]
34+
)
35+
print(result)

src/FoundationaLLM.slnLaunch

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"Name": "Vectorization Worker and Gateway",
4+
"Projects": [
5+
{
6+
"Path": "dotnet\\VectorizationWorker\\VectorizationWorker.csproj",
7+
"Action": "Start"
8+
},
9+
{
10+
"Path": "dotnet\\GatewayAPI\\GatewayAPI.csproj",
11+
"Action": "Start"
12+
}
13+
]
14+
}
15+
]

src/dotnet/Common/Constants/ResourceProviders/VectorizationResourceProviderMetadata.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using FoundationaLLM.Common.Constants.Authorization;
22
using FoundationaLLM.Common.Models.ResourceProviders;
3+
using FoundationaLLM.Common.Models.ResourceProviders.Agent.AgentFiles;
34
using FoundationaLLM.Common.Models.ResourceProviders.Vectorization;
45
using FoundationaLLM.Common.Models.Vectorization;
56

@@ -36,7 +37,22 @@ public static class VectorizationResourceProviderMetadata
3637
new ResourceTypeAction(ResourceProviderActions.Purge, true, false, [
3738
new ResourceTypeAllowedTypes(HttpMethod.Post.Method, AuthorizableOperations.Delete, [], [], [typeof(ResourceProviderActionResult)])
3839
])
39-
]
40+
],
41+
SubTypes = new()
42+
{
43+
{
44+
VectorizationResourceTypeNames.VectorizationPipelineExecutions,
45+
new ResourceTypeDescriptor (
46+
VectorizationResourceTypeNames.VectorizationPipelineExecutions,
47+
typeof(VectorizationPipelineExecution))
48+
{
49+
AllowedTypes = [
50+
new ResourceTypeAllowedTypes(HttpMethod.Get.Method, AuthorizableOperations.Write, [], [], [typeof(ResourceProviderGetResult<VectorizationPipelineExecution>)]),
51+
],
52+
Actions = []
53+
}
54+
}
55+
}
4056
}
4157
},
4258
{

src/dotnet/Common/Constants/ResourceProviders/VectorizationResourceTypeNames.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ public static class VectorizationResourceTypeNames
1010
/// </summary>
1111
public const string VectorizationPipelines = "vectorizationPipelines";
1212

13+
/// <summary>
14+
/// Vectorization pipeline executions.
15+
/// </summary>
16+
public const string VectorizationPipelineExecutions = "vectorizationPipelineExecutions";
17+
1318
/// <summary>
1419
/// Vectorization requests.
1520
/// </summary>

src/dotnet/Common/Interfaces/IStorageService.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,21 @@ public interface IStorageService
5151
/// <returns></returns>
5252
Task WriteFileAsync(string containerName, string filePath, string fileContent, string? contentType, CancellationToken cancellationToken);
5353

54+
/// <summary>
55+
/// Updates a specified JSON file in the storage.
56+
/// </summary>
57+
/// <typeparam name="T">The type of the object persisted in the JSON file.</typeparam>
58+
/// <param name="containerName">The name of the container where the file is located.</param>
59+
/// <param name="filePath">The path of the file to update.</param>
60+
/// <param name="objectTransformer">A function that updates the object persisted in the JSON file.</param>
61+
/// <param name="cancellationToken">The cancellation token that signals that operations should be cancelled.</param>
62+
/// <returns></returns>
63+
Task UpdateJSONFileAsync<T>(
64+
string containerName,
65+
string filePath,
66+
Func<T, T> objectTransformer,
67+
CancellationToken cancellationToken) where T : class;
68+
5469
/// <summary>
5570
/// Deletes a file from storage.
5671
/// </summary>

src/dotnet/Common/Models/ResourceProviders/Vectorization/VectorizationRequest.cs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,20 @@ public class VectorizationRequest : ResourceBase
5252
[JsonPropertyName("pipeline_execution_id")]
5353
public string? PipelineExecutionId { get; set; }
5454

55+
/// <summary>
56+
/// Gets or sets the time when the pipeline execution started.
57+
/// </summary>
58+
[JsonPropertyOrder(53)]
59+
[JsonPropertyName("pipeline_execution_start")]
60+
public DateTimeOffset? PipelineExecutionStart { get; set; }
61+
5562
/// <summary>
5663
/// The <see cref="VectorizationProcessingState"/> indicating the current state of the vectorization request.
5764
/// </summary>
5865
[JsonPropertyOrder(100)]
5966
[JsonPropertyName("processing_state")]
6067
[JsonConverter(typeof(JsonStringEnumConverter))]
61-
public VectorizationProcessingState ProcessingState { get; set; }
68+
public VectorizationProcessingState ProcessingState { get; set; } = VectorizationProcessingState.New;
6269

6370
/// <summary>
6471
/// The time when the vectorization request started being processed.
@@ -179,10 +186,27 @@ public class VectorizationRequest : ResourceBase
179186
public bool Expired =>
180187
(DateTime.UtcNow - LastSuccessfulStepTime).TotalHours > 240;
181188

189+
/// <summary>
190+
/// Gets the date from the name of the vectorization request.
191+
/// </summary>
192+
/// <returns>The date parsed from the name.</returns>
193+
public DateOnly GetDateFromName() => GetDate(Name);
194+
182195
/// <summary>
183196
/// Set default property values.
184197
/// </summary>
185198
public VectorizationRequest() =>
186199
Type = "vectorization-request";
200+
201+
/// <summary>
202+
/// Gets the date from the name of the vectorization request.
203+
/// </summary>
204+
/// <param name="vectorizationRequestName">The name of the vectorization request.</param>
205+
/// <remarks>
206+
/// The required format of the vectorization request name is "yyyyMMdd-..." (e.g., "20250502-abc").
207+
/// </remarks>
208+
/// <returns>The date parsed from the name.</returns>
209+
public static DateOnly GetDate(string vectorizationRequestName) =>
210+
DateOnly.FromDateTime(DateTime.ParseExact(vectorizationRequestName.Split('-')[0], "yyyyMMdd", null));
187211
}
188212
}

src/dotnet/Common/Models/Vectorization/VectorizationPipeline.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ public class VectorizationPipeline : ResourceBase
5353
[JsonPropertyName("trigger_cron_schedule")]
5454
public string? TriggerCronSchedule { get; set; }
5555

56+
/// <summary>
57+
/// Gets or sets the identifier of the latest execution of the pipeline.
58+
/// </summary>
59+
[JsonPropertyName("latest_execution_id")]
60+
public string? LatestExecutionId { get; set; }
61+
5662
/// <summary>
5763
/// Set default property values.
5864
/// </summary>

0 commit comments

Comments
 (0)