fix: change file size column from IntegerField to BigIntegerField to support files > 2GB (#14148)

### What problem does this PR solve?

Fixes #6034

Changes the `size` field in both `Document` and `File` models from
`IntegerField` (32-bit, max ~2GB) to `BigIntegerField` (64-bit, max
~9.2EB), and adds corresponding database migrations.

## Problem

When uploading a file larger than 2GB, the `size` value overflows a
32-bit signed integer (max 2,147,483,647). This causes:

- The stored `size` wraps around to an incorrect value (e.g., a 3GB file
shows as 2,097,152 KB in File Management).
- Subsequent file operations (e.g., download) fail because the corrupted
size leads to invalid storage lookups.

## Changes

- `Document.size`: `IntegerField` → `BigIntegerField`
- `File.size`: `IntegerField` → `BigIntegerField`
- Added `alter_db_column_type` migrations in `migrate_db()` for both
`document.size` and `file.size` columns to ensure existing deployments
are upgraded automatically.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Signed-off-by: noob <yixiao121314@outlook.com>
This commit is contained in:
euvre
2026-04-16 15:43:29 +08:00
committed by GitHub
parent 0cd49e14dd
commit 9a785b26bd

View File

@@ -899,7 +899,7 @@ class Document(DataBaseModel):
created_by = CharField(max_length=32, null=False, help_text="who created it", index=True)
name = CharField(max_length=255, null=True, help_text="file name", index=True)
location = CharField(max_length=255, null=True, help_text="where dose it store", index=True)
size = IntegerField(default=0, index=True)
size = BigIntegerField(default=0, index=True)
token_num = IntegerField(default=0, index=True)
chunk_num = IntegerField(default=0, index=True)
progress = FloatField(default=0, index=True)
@@ -924,7 +924,7 @@ class File(DataBaseModel):
created_by = CharField(max_length=32, null=False, help_text="who created it", index=True)
name = CharField(max_length=255, null=False, help_text="file name or folder name", index=True)
location = CharField(max_length=255, null=True, help_text="where dose it store", index=True)
size = IntegerField(default=0, index=True)
size = BigIntegerField(default=0, index=True)
type = CharField(max_length=32, null=False, help_text="file extension", index=True)
source_type = CharField(max_length=128, null=False, default="", help_text="where dose this document come from", index=True)
@@ -1648,6 +1648,8 @@ def migrate_db():
alter_db_add_column(migrator, "memory", "tenant_llm_id", IntegerField(null=True, help_text="id in tenant_llm", index=True))
alter_db_add_column(migrator, "user_canvas_version", "release", BooleanField(null=False, help_text="is released", default=False, index=True))
alter_db_add_column(migrator, "api_4_conversation", "version_title", CharField(max_length=255, null=True, help_text="canvas version title when session created", index=False))
alter_db_column_type(migrator, "document", "size", BigIntegerField(default=0, index=True))
alter_db_column_type(migrator, "file", "size", BigIntegerField(default=0, index=True))
logging.disable(logging.NOTSET)
# this is after re-enabling logging to allow logging changed user emails
migrate_add_unique_email(migrator)