Skip to content

Commit f729ea9

Browse files
committed
refactor: File model
1 parent 115e110 commit f729ea9

File tree

3 files changed

+74
-4
lines changed

3 files changed

+74
-4
lines changed

apps/common/utils/common.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from ..exception.app_exception import AppApiException
2525
from ..models.db_model_manage import DBModelManage
26+
import hashlib
2627

2728

2829
def password_encrypt(row_password):
@@ -124,6 +125,7 @@ def get_file_content(path):
124125
content = file.read()
125126
return content
126127

128+
127129
def sub_array(array: List, item_num=10):
128130
result = []
129131
temp = []
@@ -270,3 +272,8 @@ def bulk_create_in_batches(model, data, batch_size=1000):
270272
batch = data[i:i + batch_size]
271273
model.objects.bulk_create(batch)
272274

275+
276+
def get_sha256_hash(_bytes):
277+
sha256 = hashlib.sha256()
278+
sha256.update(_bytes)
279+
return sha256.hexdigest()
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Generated by Django 5.2 on 2025-05-07 03:40
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('knowledge', '0004_knowledge_file_size_limit_alter_document_status_and_more'),
10+
]
11+
12+
operations = [
13+
migrations.RemoveField(
14+
model_name='file',
15+
name='workspace_id',
16+
),
17+
migrations.AddField(
18+
model_name='file',
19+
name='file_size',
20+
field=models.IntegerField(default=0, verbose_name='文件大小'),
21+
),
22+
migrations.AddField(
23+
model_name='file',
24+
name='sha256_hash',
25+
field=models.CharField(default='', verbose_name='文件sha256_hash标识'),
26+
),
27+
migrations.AddField(
28+
model_name='file',
29+
name='source_id',
30+
field=models.CharField(default='TEMPORARY_100_MINUTE', verbose_name='资源id'),
31+
),
32+
migrations.AddField(
33+
model_name='file',
34+
name='source_type',
35+
field=models.CharField(choices=[('KNOWLEDGE', 'Knowledge'), ('APPLICATION', 'Application'), ('TEMPORARY_30_MINUTE', 'Temporary 30 Minute'), ('TEMPORARY_100_MINUTE', 'Temporary 120 Minute'), ('TEMPORARY_1_DAY', 'Temporary 1 Day')], default='TEMPORARY_100_MINUTE', verbose_name='资源类型'),
36+
)
37+
]

apps/knowledge/models/knowledge.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import uuid_utils.compat as uuid
44
from django.contrib.postgres.search import SearchVectorField
55
from django.db import models
6+
from django.db.models import QuerySet
67
from django.db.models.signals import pre_delete
78
from django.dispatch import receiver
89
from mptt.fields import TreeForeignKey
910
from mptt.models import MPTTModel
1011

1112
from common.db.sql_execute import select_one
1213
from common.mixins.app_model_mixin import AppModelMixin
14+
from common.utils.common import get_sha256_hash
1315
from models_provider.models import Model
1416
from users.models import User
1517

@@ -221,6 +223,19 @@ class SearchMode(models.TextChoices):
221223
blend = 'blend'
222224

223225

226+
class FileSourceType(models.TextChoices):
227+
# 知识库 跟随知识库被删除而被删除 source_id 为知识库id
228+
KNOWLEDGE = "KNOWLEDGE"
229+
# 应用 跟随应用被删除而被删除 source_id 为应用id
230+
APPLICATION = "APPLICATION"
231+
# 临时30分钟 数据30分钟后被清理 source_id 为TEMPORARY_30_MINUTE
232+
TEMPORARY_30_MINUTE = "TEMPORARY_30_MINUTE"
233+
# 临时120分钟 数据120分钟后被清理 source_id为TEMPORARY_100_MINUTE
234+
TEMPORARY_120_MINUTE = "TEMPORARY_100_MINUTE"
235+
# 临时1天 数据1天后被清理 source_id为TEMPORARY_1_DAY
236+
TEMPORARY_1_DAY = "TEMPORARY_1_DAY"
237+
238+
224239
class VectorField(models.Field):
225240
def db_type(self, connection):
226241
return 'vector'
@@ -246,16 +261,25 @@ class Meta:
246261
class File(AppModelMixin):
247262
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid7, editable=False, verbose_name="主键id")
248263
file_name = models.CharField(max_length=256, verbose_name="文件名称", default="")
249-
workspace_id = models.CharField(max_length=64, verbose_name="工作空间id", default="default", db_index=True)
264+
file_size = models.IntegerField(verbose_name="文件大小", default=0)
265+
sha256_hash = models.CharField(verbose_name="文件sha256_hash标识", default="")
266+
source_type = models.CharField(verbose_name="资源类型", choices=FileSourceType,
267+
default=FileSourceType.TEMPORARY_120_MINUTE.value)
268+
source_id = models.CharField(verbose_name="资源id", default=FileSourceType.TEMPORARY_120_MINUTE.value)
250269
loid = models.IntegerField(verbose_name="loid")
251270
meta = models.JSONField(verbose_name="文件关联数据", default=dict)
252271

253272
class Meta:
254273
db_table = "file"
255274

256275
def save(self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None):
257-
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
258-
self.loid = result['loid']
276+
sha256_hash = get_sha256_hash(bytea)
277+
f = QuerySet(File).filter(sha256_hash=sha256_hash).first()
278+
if f is not None:
279+
self.loid = f.loid
280+
else:
281+
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
282+
self.loid = result['loid']
259283
super().save()
260284

261285
def get_bytes(self):
@@ -265,4 +289,6 @@ def get_bytes(self):
265289

266290
@receiver(pre_delete, sender=File)
267291
def on_delete_file(sender, instance, **kwargs):
268-
select_one(f'SELECT lo_unlink({instance.loid})', [])
292+
exist = QuerySet(File).filter(loid=instance.loid).exclude(id=instance.id).exists()
293+
if not exist:
294+
select_one(f'SELECT lo_unlink({instance.loid})', [])

0 commit comments

Comments
 (0)