-
Notifications
You must be signed in to change notification settings - Fork 114
Added classes required for telemetry #572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
4c122b1
23374be
3971d3a
91e3f40
6331fc1
63593a6
bb69dc9
4cb0d70
d1efa03
2fc3cb6
73471e9
74c6463
cbc9ebf
9d10e16
1c467f3
6302327
e16fce5
7461d96
95e43e4
d72fb27
28efaba
c8c08dd
74ea9b6
ac7881f
bff17b5
6219d38
6305323
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
import time | ||
from typing import Dict, Tuple, List, Optional, Any, Union, Sequence | ||
|
||
import pandas | ||
|
||
try: | ||
|
@@ -234,6 +233,13 @@ def read(self) -> Optional[OAuthToken]: | |
server_hostname, **kwargs | ||
) | ||
|
||
self.server_telemetry_enabled = True | ||
self.client_telemetry_enabled = kwargs.get("enable_telemetry", False) | ||
self.telemetry_enabled = ( | ||
self.client_telemetry_enabled and self.server_telemetry_enabled | ||
) | ||
telemetry_batch_size = kwargs.get("telemetry_batch_size", 200) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there some basis for this 200 value ? cc @vikrantpuppala There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. default telemetry batch size is 200 in the JDBC driver There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we fetch the hardcoded variable instead of hardcoding it ourselves? Consider this scenario : Developer changes the default value, but forgets to change line 241 - this will incorrectly populate the telemetry logs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Also, why should we indicate the JDBC driver values here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moreover, the populating of telemetry fields should be abstracted out of client.py |
||
|
||
user_agent_entry = kwargs.get("user_agent_entry") | ||
if user_agent_entry is None: | ||
user_agent_entry = kwargs.get("_user_agent_entry") | ||
|
@@ -425,6 +431,9 @@ def _close(self, close_cursors=True) -> None: | |
|
||
self.open = False | ||
|
||
if hasattr(self, "telemetry_client"): | ||
saishreeeee marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.telemetry_client.close() | ||
|
||
def commit(self): | ||
"""No-op because Databricks does not support transactions""" | ||
pass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.HostDetails import HostDetails | ||
from databricks.sql.telemetry.enums.AuthMech import AuthMech | ||
from databricks.sql.telemetry.enums.AuthFlow import AuthFlow | ||
from databricks.sql.telemetry.enums.DatabricksClientType import DatabricksClientType | ||
|
||
|
||
@dataclass | ||
class DriverConnectionParameters: | ||
http_path: str | ||
mode: DatabricksClientType | ||
host_info: HostDetails | ||
auth_mech: AuthMech | ||
auth_flow: AuthFlow | ||
auth_scope: str | ||
discovery_url: str | ||
allowed_volume_ingestion_paths: str | ||
azure_tenant_id: str | ||
socket_timeout: int | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Part of TelemetryEvent | ||
# DriverConnectionParameters connectionParams = new DriverConnectionParameters( | ||
# httpPath = " /sql/1.0/endpoints/1234567890abcdef", | ||
# driverMode = "THRIFT", | ||
# hostDetails = new HostDetails( | ||
# hostUrl = "https://my-workspace.cloud.databricks.com", | ||
# port = 443 | ||
# ), | ||
# authMech = "OAUTH", | ||
# authFlow = "AZURE_MANAGED_IDENTITIES", | ||
# authScope = "sql", | ||
# discoveryUrl = "https://example-url", | ||
# allowedVolumeIngestionPaths = "[]", | ||
# azureTenantId = "1234567890abcdef", | ||
# socketTimeout = 10000 | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
|
||
|
||
@dataclass | ||
class DriverErrorInfo: | ||
error_name: str | ||
stack_trace: str | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Required for ErrorLogs | ||
# DriverErrorInfo errorInfo = new DriverErrorInfo( | ||
# errorName="CONNECTION_ERROR", | ||
# stackTrace="Connection failure while using the Databricks SQL Python connector. Failed to connect to server: https://my-workspace.cloud.databricks.com\n" + | ||
# "databricks.sql.exc.OperationalError: Connection refused: connect\n" + | ||
# "at databricks.sql.thrift_backend.ThriftBackend.make_request(ThriftBackend.py:329)\n" + | ||
# "at databricks.sql.thrift_backend.ThriftBackend.attempt_request(ThriftBackend.py:366)\n" + | ||
# "at databricks.sql.thrift_backend.ThriftBackend.open_session(ThriftBackend.py:575)\n" + | ||
# "at databricks.sql.client.Connection.__init__(client.py:69)\n" + | ||
# "at databricks.sql.client.connect(connection.py:123)") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql import __version__ | ||
|
||
|
||
@dataclass | ||
class DriverSystemConfiguration: | ||
driver_version: str | ||
os_name: str | ||
os_version: str | ||
os_arch: str | ||
runtime_name: str | ||
runtime_version: str | ||
runtime_vendor: str | ||
client_app_name: str | ||
locale_name: str | ||
driver_name: str | ||
char_set_encoding: str | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Part of TelemetryEvent | ||
# DriverSystemConfiguration systemConfig = new DriverSystemConfiguration( | ||
# driver_version = "2.9.3", | ||
# os_name = "Darwin", | ||
# os_version = "24.4.0", | ||
# os_arch = "arm64", | ||
# runtime_name = "CPython", | ||
# runtime_version = "3.13.3", | ||
# runtime_vendor = "cpython", | ||
# client_app_name = "databricks-sql-python", | ||
# locale_name = "en_US", | ||
# driver_name = "databricks-sql-python", | ||
# char_set_encoding = "UTF-8" | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.enums.DriverVolumeOperationType import ( | ||
DriverVolumeOperationType, | ||
) | ||
|
||
|
||
@dataclass | ||
class DriverVolumeOperation: | ||
volume_operation_type: DriverVolumeOperationType | ||
volume_path: str | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Part of TelemetryEvent | ||
# DriverVolumeOperation volumeOperation = new DriverVolumeOperation( | ||
# volumeOperationType = "LIST", | ||
# volumePath = "/path/to/volume" | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.TelemetryClientContext import TelemetryClientContext | ||
|
||
|
||
@dataclass | ||
class FrontendLogContext: | ||
client_context: TelemetryClientContext | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# used in TelemetryFrontendLog | ||
# FrontendLogContext frontendLogContext = new FrontendLogContext( | ||
# clientContext = new TelemetryClientContext( | ||
# timestampMillis = 1716489600000, | ||
# userAgent = "databricks-sql-python-test" | ||
# ) | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.TelemetryEvent import TelemetryEvent | ||
|
||
|
||
@dataclass | ||
class FrontendLogEntry: | ||
sql_driver_log: TelemetryEvent | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
|
||
|
||
@dataclass | ||
class HostDetails: | ||
host_url: str | ||
port: int | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Part of DriverConnectionParameters | ||
saishreeeee marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# HostDetails hostDetails = new HostDetails( | ||
# hostUrl = "https://my-workspace.cloud.databricks.com", | ||
# port = 443 | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.enums.StatementType import StatementType | ||
from databricks.sql.telemetry.enums.ExecutionResultFormat import ExecutionResultFormat | ||
|
||
|
||
@dataclass | ||
class SqlExecutionEvent: | ||
statement_type: StatementType | ||
is_compressed: bool | ||
execution_result: ExecutionResultFormat | ||
retry_count: int | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# Part of TelemetryEvent | ||
# SqlExecutionEvent sqlExecutionEvent = new SqlExecutionEvent( | ||
# statementType = "QUERY", | ||
# isCompressed = true, | ||
# executionResult = "INLINE_ARROW", | ||
# retryCount = 0 | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from dataclasses import dataclass, asdict | ||
import json | ||
|
||
|
||
@dataclass | ||
class TelemetryClientContext: | ||
timestamp_millis: int | ||
user_agent: str | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) | ||
|
||
|
||
# used in FrontendLogContext | ||
saishreeeee marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# TelemetryClientContext clientContext = new TelemetryClientContext( | ||
# timestampMillis = 1716489600000, | ||
# userAgent = "databricks-sql-python-test" | ||
# ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.DriverSystemConfiguration import DriverSystemConfiguration | ||
from databricks.sql.telemetry.DriverConnectionParameters import ( | ||
DriverConnectionParameters, | ||
) | ||
from databricks.sql.telemetry.DriverVolumeOperation import DriverVolumeOperation | ||
from databricks.sql.telemetry.SqlExecutionEvent import SqlExecutionEvent | ||
from databricks.sql.telemetry.DriverErrorInfo import DriverErrorInfo | ||
|
||
|
||
@dataclass | ||
class TelemetryEvent: | ||
session_id: str | ||
sql_statement_id: str | ||
system_configuration: DriverSystemConfiguration | ||
driver_connection_params: DriverConnectionParameters | ||
auth_type: str | ||
vol_operation: DriverVolumeOperation | ||
sql_operation: SqlExecutionEvent | ||
error_info: DriverErrorInfo | ||
operation_latency_ms: int | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from databricks.sql.telemetry.FrontendLogContext import FrontendLogContext | ||
from databricks.sql.telemetry.FrontendLogEntry import FrontendLogEntry | ||
|
||
|
||
@dataclass | ||
class TelemetryFrontendLog: | ||
workspace_id: int | ||
frontend_log_event_id: str | ||
context: FrontendLogContext | ||
entry: FrontendLogEntry | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from typing import List, Optional | ||
|
||
|
||
@dataclass | ||
class TelemetryRequest: | ||
uploadTime: int | ||
items: List[str] | ||
protoLogs: Optional[List[str]] | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import json | ||
from dataclasses import dataclass, asdict | ||
from typing import List, Optional | ||
|
||
|
||
@dataclass | ||
class TelemetryResponse: | ||
errors: List[str] | ||
numSuccess: int | ||
numProtoSuccess: int | ||
|
||
def to_json(self): | ||
return json.dumps(asdict(self)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from enum import Enum | ||
|
||
|
||
class AuthFlow(Enum): | ||
TOKEN_PASSTHROUGH = "token_passthrough" | ||
CLIENT_CREDENTIALS = "client_credentials" | ||
BROWSER_BASED_AUTHENTICATION = "browser_based_authentication" | ||
AZURE_MANAGED_IDENTITIES = "azure_managed_identities" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from enum import Enum | ||
|
||
|
||
class AuthMech(Enum): | ||
OTHER = "other" | ||
PAT = "pat" | ||
OAUTH = "oauth" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from enum import Enum | ||
|
||
|
||
class DatabricksClientType(Enum): | ||
SEA = "SEA" | ||
THRIFT = "THRIFT" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from enum import Enum | ||
|
||
|
||
class DriverVolumeOperationType(Enum): | ||
TYPE_UNSPECIFIED = "type_unspecified" | ||
PUT = "put" | ||
GET = "get" | ||
DELETE = "delete" | ||
LIST = "list" | ||
QUERY = "query" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from enum import Enum | ||
|
||
|
||
class ExecutionResultFormat(Enum): | ||
FORMAT_UNSPECIFIED = "format_unspecified" | ||
INLINE_ARROW = "inline_arrow" | ||
EXTERNAL_LINKS = "external_links" | ||
COLUMNAR_INLINE = "columnar_inline" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from enum import Enum | ||
|
||
|
||
class StatementType(Enum): | ||
NONE = "none" | ||
QUERY = "query" | ||
SQL = "sql" | ||
UPDATE = "update" | ||
METADATA = "metadata" |
Uh oh!
There was an error while loading. Please reload this page.