diff --git a/api/db/services/conversation_service.py b/api/db/services/conversation_service.py index 898e4dcc05..73afe09e1c 100644 --- a/api/db/services/conversation_service.py +++ b/api/db/services/conversation_service.py @@ -82,6 +82,13 @@ class ConversationService(CommonService): sha256_id = hashlib.sha256( f"{dialog_id}:{channel_id}:{chat_id}".encode("utf-8") ).hexdigest()[:32] + # codeql[py/weak-sensitive-data-hashing] Intentional: the + # MD5 here is a backward-compatibility lookup for rows + # created under the previous hashing scheme. The + # corresponding SHA-256 lookup is the new writer path; this + # MD5 is read-only and only used to find-and-migrate + # existing rows on first access. It is not used for + # authentication or any other security-sensitive purpose. legacy_id = hashlib.md5( f"{dialog_id}:{channel_id}:{chat_id}".encode("utf-8") ).hexdigest()[:32] diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 765a891d77..6ff5b7ce9f 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -79,6 +79,11 @@ class LLMBundle(LLM4Tenant): if text is None or not str(text).strip(): marker = "None" if text is None else "whitespace-only" logging.warning( + # codeql[py/clear-text-logging-sensitive-data] False positive: + # model_config["llm_name"] is the model identifier (e.g. + # "gpt-4"), not an API key or credential. CodeQL flags + # it as a sensitive data source only because it lives + # in the same dict as api_key. "LLMBundle.encode: empty input at index %d (%s) coerced to placeholder 'None' for model %s", idx, marker, @@ -112,6 +117,9 @@ class LLMBundle(LLM4Tenant): if query is None or not str(query).strip(): marker = "None" if query is None else "whitespace-only" logging.warning( + # codeql[py/clear-text-logging-sensitive-data] False positive: + # llm_name is a model identifier, not a credential. See the + # matching suppression on the encode() warning above. "LLMBundle.encode_queries: empty query (%s) coerced to placeholder 'None' for model %s", marker, self.model_config["llm_name"], @@ -248,6 +256,9 @@ class LLMBundle(LLM4Tenant): for chunk in self.mdl.tts(text): if isinstance(chunk, int): + # codeql[py/clear-text-logging-sensitive-data] False positive: + # llm_name is a model identifier (e.g. "tts-1"), not a + # credential. The token count is non-sensitive. logging.info("LLMBundle.tts used_tokens: {}, model_name: {}".format(chunk, self.model_config["llm_name"])) return yield chunk diff --git a/common/misc_utils.py b/common/misc_utils.py index c5d8b7a7bb..3fe8de5fd2 100644 --- a/common/misc_utils.py +++ b/common/misc_utils.py @@ -115,8 +115,14 @@ async def download_img(url): async for chunk in response.aiter_bytes(): if len(body) + len(chunk) > _OAUTH_AVATAR_MAX_BYTES: logger.warning( + # codeql[py/clear-text-logging-sensitive-data] + # False positive: current_url was dropped + # from the format args in this branch to + # avoid leaking OAuth tokens embedded in + # the URL query string. Only the static + # threshold value is logged. "download_img response exceeded max size: max_bytes=%s", - + _OAUTH_AVATAR_MAX_BYTES, ) await response.aclose() @@ -156,9 +162,13 @@ async def download_img(url): return "" return str(payload) + # codeql[py/clear-text-logging-sensitive-data] + # False positive: current_url was already dropped from the format + # args in this branch to avoid leaking OAuth tokens. Only the + # hop count and configured max are logged. logger.warning( "download_img redirect hop limit exceeded: redirect_hops=%s max_redirects=%s", - + redirect_hops, _OAUTH_AVATAR_MAX_REDIRECTS, ) diff --git a/internal/agent/component/invoke.go b/internal/agent/component/invoke.go index ce9df6ef7e..e63e6276d1 100644 --- a/internal/agent/component/invoke.go +++ b/internal/agent/component/invoke.go @@ -132,11 +132,11 @@ func (i *InvokeComponent) Invoke(ctx context.Context, inputs map[string]any) (ma Timeout: timeout, Transport: transport, } - // codeql[go/request-forgery] Intentional: the Invoke component is // a generic HTTP client node in the canvas DSL — operators wire it // to arbitrary endpoints. SSRF surface is limited to operators // (not end users), and outbound traffic is rate-limited by the // client timeout + maxInvokeResponseBody cap above. + // codeql[go/request-forgery] Intentional: the Invoke component is resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("Invoke: do: %w", err) diff --git a/internal/agent/sandbox/ssh.go b/internal/agent/sandbox/ssh.go index 42bded0b4e..b1ebf6ce73 100644 --- a/internal/agent/sandbox/ssh.go +++ b/internal/agent/sandbox/ssh.go @@ -498,9 +498,9 @@ func (p *SSHProvider) runRemoteCommand(ctx context.Context, client *ssh.Client, stdoutBuf, stderrBuf := &strings.Builder{}, &strings.Builder{} sess.Stdout = stdoutBuf sess.Stderr = stderrBuf - // codeql[go/command-injection] False positive: command is built // from shq()-escaped arguments only (see callers above); user // input never reaches the shell unsanitized. + // codeql[go/command-injection] False positive: command is built if err := sess.Run(command); err != nil { // ssh.ExitError carries the remote exit code; we surface // it as a normal non-zero exit (the caller can branch on diff --git a/internal/agent/tool/retrieval_service.go b/internal/agent/tool/retrieval_service.go index 58ffe55d54..32e3e7840a 100644 --- a/internal/agent/tool/retrieval_service.go +++ b/internal/agent/tool/retrieval_service.go @@ -131,6 +131,9 @@ func (simpleRetrievalService) Search(_ context.Context, req RetrievalRequest) ([ if topN > maxSimpleTopN { topN = maxSimpleTopN } + // codeql[go/uncontrolled-allocation-size] False positive: topN + // is bounded to maxSimpleTopN (1024) above, so the resulting + // slice cannot exceed ~1 MiB (chunk items are small structs). chunks := make([]RetrievalChunk, 0, topN) for i := 0; i < topN && i < 3; i++ { chunks = append(chunks, RetrievalChunk{ diff --git a/internal/cli/common_command.go b/internal/cli/common_command.go index db609bf438..e58ba1051a 100644 --- a/internal/cli/common_command.go +++ b/internal/cli/common_command.go @@ -1186,10 +1186,10 @@ func (c *CLI) AddAPIServer(cmd *Command) (ResponseIf, error) { } transport := &http.Transport{ - // codeql[go/disabled-certificate-check] Local cluster self-signed // certs are common for the API server used by the CLI; verification // is left to the operator (the URL is configured by them). Document // the trade-off here so reviewers don't re-flag the same line. + // codeql[go/disabled-certificate-check] Local cluster self-signed TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } @@ -1262,10 +1262,10 @@ func (c *CLI) AddAdminServer(cmd *Command) (ResponseIf, error) { } transport := &http.Transport{ - // codeql[go/disabled-certificate-check] Local cluster self-signed // certs are common for the admin server used by the CLI; verification // is left to the operator (the URL is configured by them). Document // the trade-off here so reviewers don't re-flag the same line. + // codeql[go/disabled-certificate-check] Local cluster self-signed TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, } diff --git a/internal/cli/user_command.go b/internal/cli/user_command.go index 61444a85bb..96cc6766d1 100644 --- a/internal/cli/user_command.go +++ b/internal/cli/user_command.go @@ -3589,6 +3589,11 @@ func (c *CLI) UserParseLocalFile(cmd *Command) (ResponseIf, error) { var result SimpleResponse result.Code = 0 + // codeql[go/clear-text-logging] False positive: filename is + // reduced to filepath.Base(...) so the full path (which can + // contain user-identifying directory components) never reaches + // the log. The format is operator-facing status output, not a + // server log. result.Message = fmt.Sprintf("Success to parse local file %q, vision: %v, chat: %v, asr: %v, ocr: %v, embedding: %v, doc_parse: %v", filepath.Base(filename), visionModel, chatModel, asrModel, ocrModel, embeddingModel, docParseModel) fmt.Println(result.Message) return &result, nil diff --git a/internal/dao/pipeline_operation_log.go b/internal/dao/pipeline_operation_log.go index 7f6b0f0ee9..cf0b2eb2b6 100644 --- a/internal/dao/pipeline_operation_log.go +++ b/internal/dao/pipeline_operation_log.go @@ -97,11 +97,11 @@ func (dao *PipelineOperationLogDAO) GetDatasetLogsByKBID(kbID string, page, page return nil, 0, err } - // codeql[go/sql-injection] False positive: pipelineLogOrderClause // above validates `orderby` against pipelineLogOrderableColumns // (a closed allowlist of column names) and defaults to a safe value // if no match is found. The only string that flows into Order() is // the whitelisted column name + " ASC"/" DESC" suffix. + // codeql[go/sql-injection] False positive: pipelineLogOrderClause query = query.Order(pipelineLogOrderClause(orderby, desc)) if page > 0 && pageSize > 0 { query = query.Offset((page - 1) * pageSize).Limit(pageSize) @@ -139,11 +139,11 @@ func (dao *PipelineOperationLogDAO) GetFileLogsByKBID(kbID string, page, pageSiz return nil, 0, err } - // codeql[go/sql-injection] False positive: pipelineLogOrderClause // above validates `orderby` against pipelineLogOrderableColumns // (a closed allowlist of column names) and defaults to a safe value // if no match is found. The only string that flows into Order() is // the whitelisted column name + " ASC"/" DESC" suffix. + // codeql[go/sql-injection] False positive: pipelineLogOrderClause query = query.Order(pipelineLogOrderClause(orderby, desc)) if page > 0 && pageSize > 0 { query = query.Offset((page - 1) * pageSize).Limit(pageSize) diff --git a/internal/dao/user_canvas.go b/internal/dao/user_canvas.go index c9e7dbab9b..ee08c7463a 100644 --- a/internal/dao/user_canvas.go +++ b/internal/dao/user_canvas.go @@ -258,6 +258,11 @@ func (dao *UserCanvasDAO) ListByTenantIDs(ownerIDs []string, userID string, page } order := userCanvasOrderClause(orderby, desc) + // codeql[go/sql-injection] False positive: `order` was just derived + // from userCanvasOrderClause above, which validates `orderby` + // against userCanvasOrderableColumns (a closed allowlist) and + // defaults to "create_time" on miss. The string spliced into + // Order() is always one of a fixed set of column names. query := base.Order(order) if page > 0 && pageSize > 0 { diff --git a/internal/engine/infinity/chunk.go b/internal/engine/infinity/chunk.go index da6052b780..314d2a4599 100644 --- a/internal/engine/infinity/chunk.go +++ b/internal/engine/infinity/chunk.go @@ -343,11 +343,11 @@ func (e *infinityEngine) InsertChunks(ctx context.Context, chunks []map[string]i if len(insertChunks) > 0 { idList := make([]string, len(insertChunks)) for i, chunk := range insertChunks { - // codeql[go/unsafe-quoting] False positive: chunk["id"] // is a UUID produced by the document ingestion path // (uuid.NewString), not user input. We single-quote it // for Infinity SQL; UUIDs cannot contain single quotes // by construction (RFC 4122 §3). + // codeql[go/unsafe-quoting] False positive: chunk["id"] idList[i] = fmt.Sprintf("'%v'", chunk["id"]) } filter := fmt.Sprintf("id IN (%s)", strings.Join(idList, ", ")) diff --git a/internal/entity/models/302ai.go b/internal/entity/models/302ai.go index 431b8d605e..fb64492a7d 100644 --- a/internal/entity/models/302ai.go +++ b/internal/entity/models/302ai.go @@ -559,10 +559,10 @@ func (a *AI302Model) TranscribeAudio(modelName *string, file *string, apiConfig writer := multipart.NewWriter(&body) // open audio file - // codeql[go/path-injection] False positive: *file is the audio // file path the caller passes in to upload. The user (or // operator-supplied pipeline) explicitly chose this path, and the // OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio audioFile, err := os.Open(strings.TrimSpace(*file)) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/bedrock.go b/internal/entity/models/bedrock.go index 109c1888a2..39e1cd4433 100644 --- a/internal/entity/models/bedrock.go +++ b/internal/entity/models/bedrock.go @@ -797,10 +797,10 @@ func (b *BedrockModel) ListModels(apiConfig *APIConfig) ([]ListModelResponse, er return nil, err } - // codeql[go/request-forgery] False positive: AWS Bedrock endpoint is // derived from the AWS region (operator config, see AWSConfig above), // not from user input. The signed request enforces the destination // via sigv4 — a tampered URL would fail signature verification. + // codeql[go/request-forgery] False positive: AWS Bedrock endpoint is resp, err := b.baseModel.httpClient.Do(req) if err != nil { return nil, fmt.Errorf("bedrock: send request: %w", err) diff --git a/internal/entity/models/cohere.go b/internal/entity/models/cohere.go index 11a79f00af..f73613d82e 100644 --- a/internal/entity/models/cohere.go +++ b/internal/entity/models/cohere.go @@ -526,8 +526,8 @@ func (c *CoHereModel) TranscribeAudio(modelName *string, file *string, apiConfig writer := multipart.NewWriter(&body) // open audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/cometapi.go b/internal/entity/models/cometapi.go index 17ff34969f..4ea7ade111 100644 --- a/internal/entity/models/cometapi.go +++ b/internal/entity/models/cometapi.go @@ -558,8 +558,8 @@ func (c *CometAPIModel) TranscribeAudio(modelName *string, file *string, apiConf writer := multipart.NewWriter(&body) // open audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/deepinfra.go b/internal/entity/models/deepinfra.go index 5ef0ed2048..f9192fa084 100644 --- a/internal/entity/models/deepinfra.go +++ b/internal/entity/models/deepinfra.go @@ -537,8 +537,8 @@ func (d *DeepInfraModel) TranscribeAudio(modelName *string, file *string, apiCon } // Open File - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/fishaudio.go b/internal/entity/models/fishaudio.go index 525e785abb..9411df1e34 100644 --- a/internal/entity/models/fishaudio.go +++ b/internal/entity/models/fishaudio.go @@ -89,8 +89,8 @@ func (f *FishAudioModel) TranscribeAudio(modelName *string, file *string, apiCon writer := multipart.NewWriter(&body) // audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/groq.go b/internal/entity/models/groq.go index 9fa4b1e1c8..bec9fdf109 100644 --- a/internal/entity/models/groq.go +++ b/internal/entity/models/groq.go @@ -373,8 +373,8 @@ func (g *GroqModel) TranscribeAudio(modelName *string, file *string, apiConfig * writer := multipart.NewWriter(&body) // open audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/openai.go b/internal/entity/models/openai.go index f7f4c8c5c4..c09e239a84 100644 --- a/internal/entity/models/openai.go +++ b/internal/entity/models/openai.go @@ -797,8 +797,8 @@ func (o *OpenAIModel) newOpenAIASRRequest(ctx context.Context, modelName *string var body bytes.Buffer writer := multipart.NewWriter(&body) - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, "", fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/openrouter.go b/internal/entity/models/openrouter.go index cf4247e8bb..dcd1923d37 100644 --- a/internal/entity/models/openrouter.go +++ b/internal/entity/models/openrouter.go @@ -557,8 +557,8 @@ func (o *OpenRouterModel) TranscribeAudio(modelName *string, file *string, apiCo return nil, fmt.Errorf("OpenRouter ASR url suffix is missing") } - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audio, err := os.ReadFile(*file) if err != nil { return nil, fmt.Errorf("failed to read audio file: %w", err) diff --git a/internal/entity/models/siliconflow.go b/internal/entity/models/siliconflow.go index 6cad2129d0..b2e586202f 100644 --- a/internal/entity/models/siliconflow.go +++ b/internal/entity/models/siliconflow.go @@ -737,8 +737,8 @@ func (s *SiliconflowModel) TranscribeAudio(modelName *string, file *string, apiC writer := multipart.NewWriter(&body) // open audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/togetherai.go b/internal/entity/models/togetherai.go index f66e33a1ad..68f5616c09 100644 --- a/internal/entity/models/togetherai.go +++ b/internal/entity/models/togetherai.go @@ -538,8 +538,8 @@ func (t *TogetherAIModel) TranscribeAudio(modelName *string, file *string, apiCo writer := multipart.NewWriter(&body) // audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/xai.go b/internal/entity/models/xai.go index 151b10405a..ab64310ec1 100644 --- a/internal/entity/models/xai.go +++ b/internal/entity/models/xai.go @@ -475,8 +475,8 @@ func (x *XAIModel) TranscribeAudio(modelName *string, file *string, apiConfig *A } // open audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/xiaomi.go b/internal/entity/models/xiaomi.go index 18ad964823..bc360f1848 100644 --- a/internal/entity/models/xiaomi.go +++ b/internal/entity/models/xiaomi.go @@ -468,8 +468,8 @@ func (x *XiaomiModel) newXiaomiASRRequest(ctx context.Context, modelName *string return nil, fmt.Errorf("xiaomi chat URL suffix is required") } - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audio, err := os.ReadFile(*file) if err != nil { return nil, fmt.Errorf("failed to read audio file: %w", err) diff --git a/internal/entity/models/xinference.go b/internal/entity/models/xinference.go index c907580643..ebc402c8ee 100644 --- a/internal/entity/models/xinference.go +++ b/internal/entity/models/xinference.go @@ -559,8 +559,8 @@ func (x *XinferenceModel) TranscribeAudio(modelName *string, file *string, apiCo writer := multipart.NewWriter(&body) // audio file - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/entity/models/zhipu-ai.go b/internal/entity/models/zhipu-ai.go index 88b4109dc9..fc0ac92d89 100644 --- a/internal/entity/models/zhipu-ai.go +++ b/internal/entity/models/zhipu-ai.go @@ -685,8 +685,8 @@ func (z *ZhipuAIModel) TranscribeAudio(modelName *string, file *string, apiConfi return nil, err } - // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. + // codeql[go/path-injection] False positive: *file is the audio file path the caller passes in to upload. The user (or operator-supplied pipeline) explicitly chose this path, and the OS access check enforces permissions anyway. audioFile, err := os.Open(*file) if err != nil { return nil, fmt.Errorf("failed to open audio file: %w", err) diff --git a/internal/handler/oauth_login.go b/internal/handler/oauth_login.go index 9a5b98fd8f..310421cd6c 100644 --- a/internal/handler/oauth_login.go +++ b/internal/handler/oauth_login.go @@ -203,11 +203,11 @@ func clearOAuthStateCookie(c *gin.Context) { // Authorization header on subsequent fetches. Lifetime mirrors the // access-token TTL used by the rest of the app. func setOAuthAuthCookie(c *gin.Context, token string) { - // codeql[go/cookie-httponly-not-set] Intentional: this cookie is // the SPA's bootstrap credential after the OAuth redirect. The // SPA reads it via document.cookie and copies it into the // Authorization header. Setting HttpOnly would break the login // flow. The token is short-lived (7 days) and signed with itsdangerous. + // codeql[go/cookie-httponly-not-set] Intentional: this cookie is http.SetCookie(c.Writer, &http.Cookie{ Name: oauthAuthCookie, Value: token, diff --git a/internal/handler/tenant.go b/internal/handler/tenant.go index 457402cb2a..225ceda768 100644 --- a/internal/handler/tenant.go +++ b/internal/handler/tenant.go @@ -552,10 +552,10 @@ func (h *TenantHandler) InsertMetadataFromFile(c *gin.Context) { } // Read the JSON file - // codeql[go/path-injection] False positive: req.FilePath is the // JSON file path the operator configured (tenant import flow). The // OS access check enforces permissions, and the handler is gated // to admin/owner roles upstream. + // codeql[go/path-injection] False positive: req.FilePath is the data, err := os.ReadFile(req.FilePath) if err != nil { c.JSON(http.StatusBadRequest, gin.H{ diff --git a/internal/service/dataset.go b/internal/service/dataset.go index 253680543f..a83f56bad6 100644 --- a/internal/service/dataset.go +++ b/internal/service/dataset.go @@ -994,6 +994,10 @@ func (s *DatasetService) sampleRandomChunksWithVectors(ctx context.Context, tena sort.Ints(offsets) baseFields := []string{"docnm_kwd", "doc_id", "content_with_weight", "page_num_int", "position_int", "top_int"} + // codeql[go/uncontrolled-allocation-size] False positive: n is + // bounded to maxEmbeddingSamples (1024) at the top of this + // function, so the samples slice cannot exceed ~1 MiB + // (embeddingCheckSample is a small struct). samples := make([]embeddingCheckSample, 0, n) for _, offset := range offsets { searchResult, err := s.docEngine.Search(ctx, &enginetypes.SearchRequest{ diff --git a/internal/service/deep_researcher.go b/internal/service/deep_researcher.go index 38be8ea751..33243d26ac 100644 --- a/internal/service/deep_researcher.go +++ b/internal/service/deep_researcher.go @@ -266,10 +266,10 @@ func (dr *DeepResearcher) _research( if suff.IsSufficient { if callback != nil { - // codeql[go/unsafe-quoting] False positive: callback is a // string-emitting function (SSE delta to the chat client), // not SQL or a shell. The single-quotes here are typographic // punctuation, not string delimiters in a structured sink. + // codeql[go/unsafe-quoting] False positive: callback is a callback(fmt.Sprintf("Yes, the retrieved information is sufficient for '%s'.", question)) } return retContent, nil diff --git a/internal/service/file.go b/internal/service/file.go index 057081a5eb..095dcca13f 100644 --- a/internal/service/file.go +++ b/internal/service/file.go @@ -1181,11 +1181,11 @@ func fetchRemoteFileSafely(rawURL string, maxSize int64) ([]byte, http.Header, s return http.ErrUseLastResponse } - // codeql[go/request-forgery] False positive: the loop above // runs assertURLSafe(currentURL) on every iteration (including // redirects), which rejects private/loopback IPs and other // SSRF targets. The "nosec G107" comment is for gosec; // CodeQL needs an explicit suppression. + // codeql[go/request-forgery] False positive: the loop above resp, err := client.Get(currentURL) // #nosec G107 if err != nil { return nil, nil, "", fmt.Errorf("failed to fetch URL: %w", err) diff --git a/internal/service/langfuse.go b/internal/service/langfuse.go index 69b7989b47..7be5cc5beb 100644 --- a/internal/service/langfuse.go +++ b/internal/service/langfuse.go @@ -233,10 +233,10 @@ func (c *LangfuseClient) post(ctx context.Context, envelope []byte) { } req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", auth) - // codeql[go/request-forgery] False positive: c.Host is configured // per tenant by an operator (see entity.TenantLangfuse), not by // the requesting user. End users only supply trace payloads // (Kind + Body), never the destination URL. + // codeql[go/request-forgery] False positive: c.Host is configured res, err := c.HTTP.Do(req) if err != nil { return @@ -304,9 +304,9 @@ func (c *LangfuseClient) GetProject(ctx context.Context) (string, string, error) } req.Header.Set("Authorization", basicAuth(c.PublicKey, c.SecretKey)) - // codeql[go/request-forgery] False positive: c.Host is configured // per tenant by an operator (see entity.TenantLangfuse), not by // the requesting user. + // codeql[go/request-forgery] False positive: c.Host is configured res, err := c.HTTP.Do(req) if err != nil { return "", "", err diff --git a/internal/utility/mcp_client.go b/internal/utility/mcp_client.go index b1142bcde1..4274fcc324 100644 --- a/internal/utility/mcp_client.go +++ b/internal/utility/mcp_client.go @@ -265,11 +265,11 @@ func streamableSend(ctx context.Context, client *http.Client, endpoint, sessionI if sessionID != "" { req.Header.Set(sessionHeader, sessionID) } - // codeql[go/request-forgery] False positive: endpoint is // validated by AssertURLSafe / PinnedHTTPClient at the MCP // client construction site, and the request goes through a // pinned transport that hard-pins the resolved IP at dial // time (so DNS rebinding can't redirect us mid-request). + // codeql[go/request-forgery] False positive: endpoint is resp, err := client.Do(req) if err != nil { return "", nil, mapMCPConnectionError(err) @@ -324,10 +324,10 @@ func fetchToolsSSE(ctx context.Context, endpoint string, headers map[string]stri for k, v := range headers { streamReq.Header.Set(k, v) } - // codeql[go/request-forgery] False positive: the SSE endpoint is // operator-configured (tenant MCP URL, set per-tenant by admin) and // is passed through AssertURLSafe + PinnedHTTPClient before we // reach this point. + // codeql[go/request-forgery] False positive: the SSE endpoint is streamResp, err := client.Do(streamReq) if err != nil { return nil, mapMCPConnectionError(err) @@ -377,11 +377,11 @@ func fetchToolsSSE(ctx context.Context, endpoint string, headers map[string]stri for k, v := range headers { req.Header.Set(k, v) } - // codeql[go/request-forgery] False positive: postURL was // just re-validated against AssertURLSafe above (and re-pinned // to a fresh client if the host differs from the original // SSE endpoint), so the request cannot be redirected to an // internal target. + // codeql[go/request-forgery] False positive: postURL was resp, err := postClient.Do(req) if err != nil { return mapMCPConnectionError(err) diff --git a/internal/utility/smtp.go b/internal/utility/smtp.go index 7936b4ee96..595b82d814 100644 --- a/internal/utility/smtp.go +++ b/internal/utility/smtp.go @@ -191,11 +191,11 @@ func deliverMail(client *smtp.Client, from, to string, msg []byte) error { if err != nil { return fmt.Errorf("smtp data: %w", err) } - // codeql[go/email-injection] False positive: deliverMail builds // the RFC-822 envelope (from/to) from server-side configuration; // msg is the body the caller already constructed and validated. // Headers in msg are operator-controlled (system notifications), // not user-supplied form input. + // codeql[go/email-injection] False positive: deliverMail builds if _, err := w.Write(msg); err != nil { w.Close() return fmt.Errorf("smtp write: %w", err) diff --git a/rag/prompts/generator.py b/rag/prompts/generator.py index f399c053e9..267954a93a 100644 --- a/rag/prompts/generator.py +++ b/rag/prompts/generator.py @@ -101,6 +101,10 @@ def message_fit_in(msg, max_length=4000): # flags this as clear-text-logging-sensitive-data because msg # carries user-controlled conversation content. The token # counts already capture what this debug line needs to convey. + # codeql[py/clear-text-logging-sensitive-data] False positive: + # only token counts and limits are logged; the message contents + # were intentionally dropped from this debug call (see prior + # commit) because they carried user content. logging.debug( "message_fit_in degenerate token counts total=%s max_length=%s ll=%s ll2=%s", total, diff --git a/web/src/pages/user-setting/setting-model/modal/provider-modal/hooks/use-provider-fields.tsx b/web/src/pages/user-setting/setting-model/modal/provider-modal/hooks/use-provider-fields.tsx index c7e041ecca..624d230b7f 100644 --- a/web/src/pages/user-setting/setting-model/modal/provider-modal/hooks/use-provider-fields.tsx +++ b/web/src/pages/user-setting/setting-model/modal/provider-modal/hooks/use-provider-fields.tsx @@ -54,6 +54,11 @@ const setNestedValue = (obj: any, path: string, value: any) => { if (FORBIDDEN_KEYS.has(lastKey)) { return; } + // codeql[js/prototype-pollution-utility] False positive: the + // FORBIDDEN_KEYS guard above blocks __proto__ / constructor / + // prototype before this assignment, so this line cannot reach + // the Object prototype chain. The check is on the line directly + // above; this is the only line that performs the assignment. current[lastKey] = value; };