2
2
import os
3
3
from abc import ABC , abstractmethod
4
4
from functools import cached_property
5
+ from typing import Literal
5
6
6
7
from google .cloud import bigquery
7
8
@@ -86,39 +87,24 @@ def _query_last_post(self) -> dict | None:
86
87
def _process_posts (self , posts : list [dict ]) -> list [dict ]: ...
87
88
88
89
def _dump_posts_to_bigquery (self , posts : list [dict ]) -> None :
89
- if not posts :
90
- logger .info ("No posts to dump!" )
91
- return
92
-
93
- job_config = bigquery .LoadJobConfig (
94
- schema = [
90
+ self ._dump_to_bigquery (
91
+ posts = posts ,
92
+ dump_type = "posts" ,
93
+ bq_schema_fields = [
95
94
bigquery .SchemaField ("id" , "STRING" , mode = "REQUIRED" ),
96
95
bigquery .SchemaField ("created_at" , "TIMESTAMP" , mode = "REQUIRED" ),
97
96
bigquery .SchemaField ("message" , "STRING" , mode = "REQUIRED" ),
98
97
],
99
- write_disposition = "WRITE_APPEND" ,
100
98
)
101
- try :
102
- job = self .bq_client .load_table_from_json (
103
- posts ,
104
- f"pycontw-225217.ods.{ self .POST_TABLE_NAME } " ,
105
- job_config = job_config ,
106
- )
107
- job .result ()
108
- except Exception :
109
- logger .exception ("Failed to dump posts to BigQuery: " )
110
- raise RuntimeError ("Failed to dump posts insights to BigQuery" )
111
99
112
100
@abstractmethod
113
101
def _process_posts_insights (self , posts : list [dict ]) -> list [dict ]: ...
114
102
115
103
def _dump_posts_insights_to_bigquery (self , posts : list [dict ]) -> None :
116
- if not posts :
117
- logger .info ("No post insights to dump!" )
118
- return
119
-
120
- job_config = bigquery .LoadJobConfig (
121
- schema = [
104
+ self ._dump_to_bigquery (
105
+ posts = posts ,
106
+ dump_type = "posts insights" ,
107
+ bq_schema_fields = [
122
108
bigquery .SchemaField ("post_id" , "STRING" , mode = "REQUIRED" ),
123
109
bigquery .SchemaField ("query_time" , "TIMESTAMP" , mode = "REQUIRED" ),
124
110
bigquery .SchemaField ("period" , "STRING" , mode = "REQUIRED" ),
@@ -127,6 +113,21 @@ def _dump_posts_insights_to_bigquery(self, posts: list[dict]) -> None:
127
113
bigquery .SchemaField ("retweet" , "INTEGER" , mode = "NULLABLE" ),
128
114
bigquery .SchemaField ("views" , "INTEGER" , mode = "NULLABLE" ),
129
115
],
116
+ )
117
+
118
+ def _dump_to_bigquery (
119
+ self ,
120
+ * ,
121
+ posts : list [dict ],
122
+ dump_type : Literal ["posts insights" , "posts" ],
123
+ bq_schema_fields : list [bigquery .SchemaField ],
124
+ ) -> None :
125
+ if not posts :
126
+ logger .info (f"No { dump_type } to dump!" )
127
+ return
128
+
129
+ job_config = bigquery .LoadJobConfig (
130
+ schema = bq_schema_fields ,
130
131
write_disposition = "WRITE_APPEND" ,
131
132
)
132
133
try :
@@ -137,5 +138,5 @@ def _dump_posts_insights_to_bigquery(self, posts: list[dict]) -> None:
137
138
)
138
139
job .result ()
139
140
except Exception :
140
- logger .exception ("Failed to dump posts insights to BigQuery: " )
141
- raise RuntimeError ("Failed to dump posts insights to BigQuery" )
141
+ logger .exception (f "Failed to dump { dump_type } to BigQuery: " )
142
+ raise RuntimeError (f "Failed to dump { dump_type } to BigQuery" )
0 commit comments