1010import static io .openlineage .client .OpenLineage .RunEvent .EventType .RUNNING ;
1111import static io .openlineage .client .OpenLineage .RunEvent .EventType .START ;
1212import static io .openlineage .spark .agent .util .TimeUtils .toZonedTime ;
13- import static java .util .Objects .isNull ;
1413
1514import io .openlineage .client .OpenLineage ;
1615import io .openlineage .client .OpenLineage .RunEvent ;
1716import io .openlineage .client .OpenLineage .RunEvent .EventType ;
1817import io .openlineage .client .OpenLineageClientUtils ;
1918import io .openlineage .spark .agent .EventEmitter ;
19+ import io .openlineage .spark .agent .NuEventEmitter ;
2020import io .openlineage .spark .agent .filters .EventFilterUtils ;
2121import io .openlineage .spark .agent .util .PlanUtils ;
2222import io .openlineage .spark .agent .util .ScalaConversionUtils ;
2323import io .openlineage .spark .api .OpenLineageContext ;
2424import io .openlineage .spark .api .naming .JobNameBuilder ;
2525
26- import java .lang .reflect .Field ;
2726import java .time .ZoneOffset ;
2827import java .time .ZonedDateTime ;
2928import java .util .*;
3029import java .util .concurrent .atomic .AtomicBoolean ;
31- import java .util .stream .Collectors ;
32- import java .util .stream .Stream ;
3330
3431import lombok .extern .slf4j .Slf4j ;
3532import org .apache .spark .scheduler .*;
@@ -60,10 +57,10 @@ class SparkSQLExecutionContext implements ExecutionContext {
6057
6158 private SparkSQLQueryParser sqlRecorder = new SparkSQLQueryParser ();
6259
63- private static final Set <String > NU_WANTED_EVENT_NAME_SUBSTRINGS = Set .of (
64- ".execute_insert_into_hadoop_fs_relation_command." ,
65- ".adaptive_spark_plan."
66- );
60+ // private static final Set<String> NU_WANTED_EVENT_NAME_SUBSTRINGS = Set.of(
61+ // ".execute_insert_into_hadoop_fs_relation_command.",
62+ // ".adaptive_spark_plan."
63+ // );
6764
6865 public SparkSQLExecutionContext (
6966 long executionId ,
@@ -76,54 +73,6 @@ public SparkSQLExecutionContext(
7673 this .runEventBuilder = runEventBuilder ;
7774 }
7875
79- private static Boolean shouldEmit (RunEvent event ){
80- if (RUNNING .equals (event .getEventType ())) {
81- log .info ("OpenLineage event is RUNNING and should not be emmited" );
82- return false ;
83- }
84-
85- String jobName = event .getJob ().getName ();
86- if (isNull (jobName )) {
87- log .info ("OpenLineage event has no job name should not be emitted" );
88- return false ;
89- }
90-
91- if (NU_WANTED_EVENT_NAME_SUBSTRINGS .stream ().noneMatch (jobName ::contains )) {
92- log .info ("OpenLineage event has no lineage value and will not be emmited" );
93- return false ;
94- }
95-
96- return true ;
97- }
98-
99- private static Boolean shouldKeepColumnLineageFacet (EventType eventType ) {
100- return !(START .equals (eventType ) || RUNNING .equals (eventType ));
101- }
102-
103- private static void discardColumnLineage (RunEvent event ) {
104- if (shouldKeepColumnLineageFacet (event .getEventType ())) { return ; }
105-
106- log .info ("Discarding column lineage facet for event {}" , event .getEventType ());
107-
108- try {
109- Field columnLineageFacetField = OpenLineage .DatasetFacets .class .getDeclaredField ("columnLineage" );
110- columnLineageFacetField .setAccessible (true );
111- Stream
112- .concat (event .getInputs ().stream (), event .getOutputs ().stream ())
113- .collect (Collectors .toList ())
114- .forEach (dataset -> {
115- try {
116- log .info ("Discarding column lineage facet for dataset {} {} {}" , dataset .getClass ().getName (), dataset .getNamespace (), dataset .getName ());
117- columnLineageFacetField .set (dataset .getFacets (), null );
118- } catch (IllegalAccessException e ) {
119- log .warn ("Failed to discard column lineage facet" , e );
120- }
121- });
122- } catch (NoSuchFieldException e ) {
123- log .error ("Failed to discard column lineage facet: columnLineage field not found at OpenLineage.DatasetFacets" , e );
124- }
125- }
126-
12776 @ Override
12877 public void start (SparkListenerSQLExecutionStart startEvent ) {
12978 if (log .isDebugEnabled ()) {
@@ -157,14 +106,8 @@ public void start(SparkListenerSQLExecutionStart startEvent) {
157106 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
158107 .build ());
159108
160- if (!shouldEmit (event )) {
161- return ;
162- }
163-
164- discardColumnLineage (event );
165-
166109 log .debug ("Posting event for start {}: {}" , executionId , event );
167- eventEmitter .emit (event );
110+ NuEventEmitter .emit (event , eventEmitter );
168111 }
169112
170113 @ Override
@@ -210,16 +153,10 @@ public void end(SparkListenerSQLExecutionEnd endEvent) {
210153 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
211154 .build ());
212155
213- if (!shouldEmit (event )) {
214- return ;
215- }
216-
217- discardColumnLineage (event );
218-
219156 if (log .isDebugEnabled ()) {
220157 log .debug ("Posting event for end {}: {}" , executionId , OpenLineageClientUtils .toJson (event ));
221158 }
222- eventEmitter .emit (event );
159+ NuEventEmitter .emit (event , eventEmitter );
223160 }
224161
225162 // TODO: not invoked until https://github.com/OpenLineage/OpenLineage/issues/470 is completed
@@ -249,14 +186,8 @@ public void start(SparkListenerStageSubmitted stageSubmitted) {
249186 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
250187 .build ());
251188
252- if (!shouldEmit (event )) {
253- return ;
254- }
255-
256- discardColumnLineage (event );
257-
258189 log .debug ("Posting event for stage submitted {}: {}" , executionId , event );
259- eventEmitter .emit (event );
190+ NuEventEmitter .emit (event , eventEmitter );
260191 }
261192
262193 // TODO: not invoked until https://github.com/OpenLineage/OpenLineage/issues/470 is completed
@@ -285,14 +216,8 @@ public void end(SparkListenerStageCompleted stageCompleted) {
285216 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
286217 .build ());
287218
288- if (!shouldEmit (event )) {
289- return ;
290- }
291-
292- discardColumnLineage (event );
293-
294219 log .debug ("Posting event for stage completed {}: {}" , executionId , event );
295- eventEmitter .emit (event );
220+ NuEventEmitter .emit (event , eventEmitter );
296221 }
297222
298223 @ Override
@@ -344,14 +269,8 @@ public void start(SparkListenerJobStart jobStart) {
344269 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
345270 .build ());
346271
347- if (!shouldEmit (event )) {
348- return ;
349- }
350-
351- discardColumnLineage (event );
352-
353272 log .debug ("Posting event for start {}: {}" , executionId , event );
354- eventEmitter .emit (event );
273+ NuEventEmitter .emit (event , eventEmitter );
355274 }
356275
357276 @ Override
@@ -399,14 +318,8 @@ public void end(SparkListenerJobEnd jobEnd) {
399318 .jobFacetsBuilder (getJobFacetsBuilder (olContext .getQueryExecution ().get ()))
400319 .build ());
401320
402- if (!shouldEmit (event )) {
403- return ;
404- }
405-
406- discardColumnLineage (event );
407-
408321 log .debug ("Posting event for end {}: {}" , executionId , event );
409- eventEmitter .emit (event );
322+ NuEventEmitter .emit (event , eventEmitter );
410323 }
411324
412325 @ Override
0 commit comments