@@ -47,3 +47,102 @@ This is the base class for all sources. It handles configuring the source and re
47
47
* ` default_topic ` : This method is called, in the main process, when a topic is not provided with the source.
48
48
49
49
For more information, see [ ` quixstreams.sources.base.BaseSource ` ] ( ../../api-reference/sources.md#BaseSource ) docstrings.
50
+
51
+ ## Custom Sources and Jupyter Notebook
52
+
53
+ Due to the multiprocessing nature of sources, writing a custom one in a Jupyter Notebook doesn't work out of the box.
54
+
55
+ Running this cell will produce a similar output as below:
56
+
57
+ ``` python
58
+ from quixstreams import Application
59
+ from quixstreams.sources import Source
60
+
61
+ import random
62
+ import time
63
+
64
+ class MySource (Source ):
65
+ def run (self ):
66
+ while self .running:
67
+ msg = self .serialize(key = " test" , value = random.randint(0 , 10000 ))
68
+
69
+ self .produce(
70
+ key = msg.key,
71
+ value = msg.value,
72
+ )
73
+ time.sleep(1 )
74
+
75
+ def main ():
76
+ app = Application(broker_address = " localhost:19092" )
77
+ source = MySource(name = " mysource" )
78
+
79
+ sdf = app.dataframe(source = source)
80
+ sdf.print(metadata = True )
81
+
82
+ app.run(sdf)
83
+
84
+ if __name__ == " __main__" :
85
+ main()
86
+ ```
87
+
88
+ ```
89
+ [2024-09-25 10:54:37,852] [INFO] [quixstreams] : Starting the Application with the config: broker_address="{'bootstrap.servers': 'localhost:19092'}" consumer_group="quixstreams-default" auto_offset_reset="latest" commit_interval=5.0s commit_every=0 processing_guarantee="at-least-once"
90
+ [2024-09-25 10:54:37,853] [INFO] [quixstreams] : Topics required for this application: "mysource"
91
+ [2024-09-25 10:54:37,855] [INFO] [quixstreams] : Creating a new topic "mysource" with config: "{'num_partitions': 1, 'replication_factor': 1, 'extra_config': {}}"
92
+ [2024-09-25 10:54:38,856] [INFO] [quixstreams] : Topic "mysource" has been created
93
+ [2024-09-25 10:54:38,857] [INFO] [quixstreams] : Validating Kafka topics exist and are configured correctly...
94
+ [2024-09-25 10:54:38,859] [INFO] [quixstreams] : Kafka topics validation complete
95
+ [2024-09-25 10:54:38,860] [INFO] [quixstreams] : Initializing state directory at "<project path>/state/quixstreams-default"
96
+ [2024-09-25 10:54:38,860] [INFO] [quixstreams] : Waiting for incoming messages
97
+ [2024-09-25 10:54:39,007] [INFO] [quixstreams] : Starting source mysource
98
+ Traceback (most recent call last):
99
+ File "<string>", line 1, in <module>
100
+ File "<env path>/lib/python3.12/multiprocessing/spawn.py", line 122, in spawn_main
101
+ exitcode = _main(fd, parent_sentinel)
102
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
103
+ File "<env path>/lib/python3.12/multiprocessing/spawn.py", line 132, in _main
104
+ self = reduction.pickle.load(from_parent)
105
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
106
+ AttributeError: Can't get attribute 'MySource' on <module '__main__' (<class '_frozen_importlib.BuiltinImporter'>)>
107
+ ```
108
+
109
+ To fix that, you need to define your custom source in a separate file.
110
+
111
+ ``` python
112
+ %% writefile source.py
113
+ # indicate to IPython we want to write this content to a file
114
+
115
+ from quixstreams.sources import Source
116
+
117
+ import time
118
+ import random
119
+
120
+ class MySource (Source ):
121
+ def run (self ):
122
+ while self .running:
123
+ msg = self .serialize(key = " test" , value = random.randint(0 , 10000 ))
124
+
125
+ self .produce(
126
+ key = msg.key,
127
+ value = msg.value,
128
+ )
129
+ time.sleep(1 )
130
+ ```
131
+
132
+ ``` python
133
+ from quixstreams import Application
134
+
135
+ from source import MySource
136
+
137
+ def main ():
138
+ app = Application(broker_address = " localhost:19092" )
139
+ source = MySource(name = " mysource" )
140
+
141
+ sdf = app.dataframe(source = source)
142
+ sdf.print(metadata = True )
143
+
144
+ app.run(sdf)
145
+
146
+ if __name__ == " __main__" :
147
+ main()
148
+ ```
0 commit comments