@@ -1824,6 +1824,146 @@ def _on_load_config(self, button):
1824
1824
except Exception as e :
1825
1825
print (f"❌ Error loading configuration: { str (e )} " )
1826
1826
1827
+ def _test_remote_connectivity (self , host , port , timeout = 5 ):
1828
+ """Test basic network connectivity to a remote host."""
1829
+ import socket
1830
+
1831
+ try :
1832
+ sock = socket .socket (socket .AF_INET , socket .SOCK_STREAM )
1833
+ sock .settimeout (timeout )
1834
+ result = sock .connect_ex ((host , port ))
1835
+ sock .close ()
1836
+ return result == 0
1837
+ except Exception :
1838
+ return False
1839
+
1840
+ def _test_ssh_connectivity (self , config , timeout = 10 ):
1841
+ """Test SSH connectivity with provided credentials."""
1842
+ try :
1843
+ import paramiko
1844
+
1845
+ ssh_client = paramiko .SSHClient ()
1846
+ ssh_client .set_missing_host_key_policy (paramiko .AutoAddPolicy ())
1847
+
1848
+ connect_kwargs = {
1849
+ "hostname" : config .get ("cluster_host" ),
1850
+ "port" : config .get ("cluster_port" , 22 ),
1851
+ "username" : config .get ("username" ),
1852
+ "timeout" : timeout ,
1853
+ }
1854
+
1855
+ # Use key file if provided
1856
+ key_file = config .get ("key_file" )
1857
+ if key_file :
1858
+ from pathlib import Path
1859
+
1860
+ key_path = Path (key_file ).expanduser ()
1861
+ if key_path .exists ():
1862
+ connect_kwargs ["key_filename" ] = str (key_path )
1863
+ else :
1864
+ return False
1865
+ elif config .get ("password" ):
1866
+ connect_kwargs ["password" ] = config .get ("password" )
1867
+ else :
1868
+ # Try with no authentication (for testing purposes)
1869
+ pass
1870
+
1871
+ ssh_client .connect (** connect_kwargs )
1872
+
1873
+ # Test a simple command
1874
+ stdin , stdout , stderr = ssh_client .exec_command ("echo 'test'" , timeout = 5 )
1875
+ output = stdout .read ().decode ().strip ()
1876
+ ssh_client .close ()
1877
+
1878
+ return output == "test"
1879
+
1880
+ except ImportError :
1881
+ print ("ℹ️ paramiko not available for SSH testing" )
1882
+ return False
1883
+ except Exception :
1884
+ return False
1885
+
1886
+ def _test_cloud_connectivity (self , cluster_type , config ):
1887
+ """Test cloud provider API connectivity."""
1888
+ try :
1889
+ if cluster_type == "aws" :
1890
+ return self ._test_aws_connectivity (config )
1891
+ elif cluster_type == "azure" :
1892
+ return self ._test_azure_connectivity (config )
1893
+ elif cluster_type == "gcp" :
1894
+ return self ._test_gcp_connectivity (config )
1895
+ else :
1896
+ return False
1897
+ except Exception :
1898
+ return False
1899
+
1900
+ def _test_aws_connectivity (self , config ):
1901
+ """Test AWS API connectivity."""
1902
+ try :
1903
+ import boto3
1904
+ from botocore .exceptions import NoCredentialsError , ClientError
1905
+
1906
+ # Try to create a session and list regions (minimal API call)
1907
+ session = boto3 .Session (profile_name = config .get ("aws_profile" ))
1908
+ ec2 = session .client (
1909
+ "ec2" , region_name = config .get ("aws_region" , "us-east-1" )
1910
+ )
1911
+
1912
+ # Simple API call to test connectivity
1913
+ ec2 .describe_regions (MaxResults = 1 )
1914
+ return True
1915
+
1916
+ except ImportError :
1917
+ print ("ℹ️ boto3 not available for AWS testing" )
1918
+ return False
1919
+ except (NoCredentialsError , ClientError ):
1920
+ return False
1921
+ except Exception :
1922
+ return False
1923
+
1924
+ def _test_azure_connectivity (self , config ):
1925
+ """Test Azure API connectivity."""
1926
+ try :
1927
+ from azure .identity import DefaultAzureCredential
1928
+ from azure .mgmt .resource import ResourceManagementClient
1929
+
1930
+ credential = DefaultAzureCredential ()
1931
+ subscription_id = config .get ("azure_subscription_id" )
1932
+
1933
+ if not subscription_id :
1934
+ return False
1935
+
1936
+ # Try to create a resource client and list resource groups
1937
+ resource_client = ResourceManagementClient (credential , subscription_id )
1938
+ list (resource_client .resource_groups .list (top = 1 ))
1939
+ return True
1940
+
1941
+ except ImportError :
1942
+ print ("ℹ️ Azure SDK not available for Azure testing" )
1943
+ return False
1944
+ except Exception :
1945
+ return False
1946
+
1947
+ def _test_gcp_connectivity (self , config ):
1948
+ """Test GCP API connectivity."""
1949
+ try :
1950
+ from google .cloud import resource_manager
1951
+
1952
+ project_id = config .get ("gcp_project_id" )
1953
+ if not project_id :
1954
+ return False
1955
+
1956
+ # Try to get project information
1957
+ client = resource_manager .Client ()
1958
+ project = client .fetch_project (project_id )
1959
+ return project is not None
1960
+
1961
+ except ImportError :
1962
+ print ("ℹ️ Google Cloud SDK not available for GCP testing" )
1963
+ return False
1964
+ except Exception :
1965
+ return False
1966
+
1827
1967
def _on_test_config (self , button ):
1828
1968
"""Test the current configuration."""
1829
1969
with self .status_output :
@@ -1860,7 +2000,7 @@ def _on_test_config(self, button):
1860
2000
print (f"- Username: { username } " )
1861
2001
print (f"- Cluster type: { cluster_type } " )
1862
2002
1863
- # Basic validation - in a real implementation, we might try to connect
2003
+ # Basic validation
1864
2004
if validate_hostname (host ) or validate_ip_address (host ):
1865
2005
print ("✅ Host format is valid" )
1866
2006
else :
@@ -1879,7 +2019,21 @@ def _on_test_config(self, button):
1879
2019
else :
1880
2020
print ("ℹ️ No SSH key specified (will use password auth)" )
1881
2021
1882
- print ("ℹ️ Configuration appears valid (connection not tested)" )
2022
+ # Attempt connectivity test
2023
+ print ("🔌 Testing connectivity..." )
2024
+ if self ._test_remote_connectivity (host , port ):
2025
+ print ("✅ Host is reachable" )
2026
+
2027
+ # For SSH-based clusters, try a basic SSH connection test
2028
+ if cluster_type == "ssh" :
2029
+ if self ._test_ssh_connectivity (config ):
2030
+ print ("✅ SSH connection successful" )
2031
+ else :
2032
+ print ("⚠️ SSH connection failed (check credentials)" )
2033
+ else :
2034
+ print ("✅ Basic connectivity confirmed" )
2035
+ else :
2036
+ print ("❌ Host is not reachable or connection timed out" )
1883
2037
1884
2038
elif cluster_type == "kubernetes" :
1885
2039
# Test Kubernetes configuration
@@ -1905,8 +2059,152 @@ def _on_test_config(self, button):
1905
2059
1906
2060
print ("✅ Kubernetes configuration appears valid" )
1907
2061
2062
+ elif cluster_type == "aws" :
2063
+ # Test AWS configuration
2064
+ region = config .get ("aws_region" , "us-east-1" )
2065
+ cluster_sub_type = config .get ("aws_cluster_type" , "ec2" )
2066
+
2067
+ print ("- Provider: Amazon Web Services" )
2068
+ print (f"- Region: { region } " )
2069
+ print (f"- Service: { cluster_sub_type .upper ()} " )
2070
+
2071
+ if cluster_sub_type == "eks" :
2072
+ cluster_name = config .get ("eks_cluster_name" , "" )
2073
+ if cluster_name :
2074
+ print (f"- EKS Cluster: { cluster_name } " )
2075
+ else :
2076
+ print ("⚠️ EKS cluster name not specified" )
2077
+
2078
+ # Check if AWS credentials might be available
2079
+ import os
2080
+
2081
+ if os .getenv ("AWS_ACCESS_KEY_ID" ) or os .getenv ("AWS_PROFILE" ):
2082
+ print ("✅ AWS credentials detected in environment" )
2083
+ else :
2084
+ print (
2085
+ "ℹ️ No AWS credentials detected (may use IAM roles or config files)"
2086
+ )
2087
+
2088
+ # Test AWS API connectivity
2089
+ print ("🔌 Testing AWS API connectivity..." )
2090
+ if self ._test_cloud_connectivity ("aws" , config ):
2091
+ print ("✅ AWS API connection successful" )
2092
+ else :
2093
+ print (
2094
+ "⚠️ AWS API connection failed (check credentials and region)"
2095
+ )
2096
+
2097
+ print ("✅ AWS configuration appears valid" )
2098
+
2099
+ elif cluster_type == "azure" :
2100
+ # Test Azure configuration
2101
+ region = config .get ("azure_region" , "eastus" )
2102
+ cluster_sub_type = config .get ("azure_cluster_type" , "vm" )
2103
+
2104
+ print ("- Provider: Microsoft Azure" )
2105
+ print (f"- Region: { region } " )
2106
+ print (f"- Service: { cluster_sub_type .upper ()} " )
2107
+
2108
+ if cluster_sub_type == "aks" :
2109
+ cluster_name = config .get ("aks_cluster_name" , "" )
2110
+ resource_group = config .get ("azure_resource_group" , "" )
2111
+ if cluster_name and resource_group :
2112
+ print (f"- AKS Cluster: { cluster_name } " )
2113
+ print (f"- Resource Group: { resource_group } " )
2114
+ else :
2115
+ print ("⚠️ AKS cluster name and resource group required" )
2116
+
2117
+ # Check if Azure credentials might be available
2118
+ import os
2119
+
2120
+ if os .getenv ("AZURE_CLIENT_ID" ) or os .getenv (
2121
+ "AZURE_SUBSCRIPTION_ID"
2122
+ ):
2123
+ print ("✅ Azure credentials detected in environment" )
2124
+ else :
2125
+ print (
2126
+ "ℹ️ No Azure credentials detected (may use Azure CLI or managed identity)"
2127
+ )
2128
+
2129
+ # Test Azure API connectivity
2130
+ print ("🔌 Testing Azure API connectivity..." )
2131
+ if self ._test_cloud_connectivity ("azure" , config ):
2132
+ print ("✅ Azure API connection successful" )
2133
+ else :
2134
+ print (
2135
+ "⚠️ Azure API connection failed (check credentials and subscription)"
2136
+ )
2137
+
2138
+ print ("✅ Azure configuration appears valid" )
2139
+
2140
+ elif cluster_type == "gcp" :
2141
+ # Test GCP configuration
2142
+ region = config .get ("gcp_region" , "us-central1" )
2143
+ cluster_sub_type = config .get ("gcp_cluster_type" , "compute" )
2144
+ project_id = config .get ("gcp_project_id" , "" )
2145
+
2146
+ print ("- Provider: Google Cloud Platform" )
2147
+ print (f"- Region: { region } " )
2148
+ print (f"- Service: { cluster_sub_type .upper ()} " )
2149
+
2150
+ if project_id :
2151
+ print (f"- Project ID: { project_id } " )
2152
+ else :
2153
+ print ("⚠️ GCP project ID not specified" )
2154
+
2155
+ if cluster_sub_type == "gke" :
2156
+ cluster_name = config .get ("gke_cluster_name" , "" )
2157
+ zone = config .get ("gcp_zone" , "" )
2158
+ if cluster_name :
2159
+ print (f"- GKE Cluster: { cluster_name } " )
2160
+ if zone :
2161
+ print (f"- Zone: { zone } " )
2162
+ else :
2163
+ print ("⚠️ GKE cluster name not specified" )
2164
+
2165
+ # Check if GCP credentials might be available
2166
+ import os
2167
+
2168
+ if os .getenv ("GOOGLE_APPLICATION_CREDENTIALS" ) or os .getenv (
2169
+ "GCLOUD_PROJECT"
2170
+ ):
2171
+ print ("✅ GCP credentials detected in environment" )
2172
+ else :
2173
+ print (
2174
+ "ℹ️ No GCP credentials detected (may use gcloud auth or service account)"
2175
+ )
2176
+
2177
+ # Test GCP API connectivity
2178
+ print ("🔌 Testing GCP API connectivity..." )
2179
+ if self ._test_cloud_connectivity ("gcp" , config ):
2180
+ print ("✅ GCP API connection successful" )
2181
+ else :
2182
+ print (
2183
+ "⚠️ GCP API connection failed (check credentials and project)"
2184
+ )
2185
+
2186
+ print ("✅ GCP configuration appears valid" )
2187
+
2188
+ elif cluster_type in ["lambda_cloud" , "huggingface_spaces" ]:
2189
+ # Test other cloud providers
2190
+ provider_name = cluster_type .replace ("_" , " " ).title ()
2191
+ print (f"- Provider: { provider_name } " )
2192
+
2193
+ # Basic validation for these providers
2194
+ api_key = config .get ("api_key" , "" )
2195
+ if api_key :
2196
+ print ("✅ API key provided" )
2197
+ else :
2198
+ print ("⚠️ API key may be required" )
2199
+
2200
+ print (f"✅ { provider_name } configuration appears valid" )
2201
+
1908
2202
else :
1909
2203
print (f"⚠️ Unknown cluster type: { cluster_type } " )
2204
+ print (
2205
+ "ℹ️ Supported types: local, ssh, slurm, pbs, sge, kubernetes, "
2206
+ "aws, azure, gcp, lambda_cloud, huggingface_spaces"
2207
+ )
1910
2208
1911
2209
# Test resource configuration
1912
2210
cores = config .get ("default_cores" , 4 )
0 commit comments