Skip to content

CodeQL Integration Guide

Overview

The CodeQL integration enables deep semantic security analysis using GitHub's CodeQL engine. CodeQL provides advanced dataflow and taint tracking capabilities that complement mcp-scan's built-in analysis.

Architecture

┌─────────────────────────────────────────────────────────────────┐
│                        mcp-scan                                  │
│  ┌─────────────────────────────────────────────────────────────┐│
│  │                   CodeQL Analyzer                            ││
│  │  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐           ││
│  │  │   Client    │ │ Integration │ │   SARIF     │           ││
│  │  │  (CLI wrap) │ │  (convert)  │ │  Parser     │           ││
│  │  └──────┬──────┘ └──────┬──────┘ └──────┬──────┘           ││
│  └─────────┼───────────────┼───────────────┼───────────────────┘│
└────────────┼───────────────┼───────────────┼────────────────────┘
             │               │               │
    ┌────────▼────────┐      │      ┌────────▼────────┐
    │   CodeQL CLI    │      │      │  SARIF 2.1.0    │
    │   (subprocess)  │──────┴──────│    Results      │
    └─────────────────┘             └─────────────────┘
    ┌────────▼────────┐
    │ CodeQL Database │
    │   (temp dir)    │
    └─────────────────┘

Components

CodeQL Client (internal/codeql/client.go)

Wraps the CodeQL CLI for database creation and analysis.

// Check if CodeQL is available
if !codeql.IsAvailable() {
    log.Println("CodeQL not found in PATH")
    return
}

// Create client with default config
client, err := codeql.NewClient(codeql.DefaultConfig())
if err != nil {
    log.Fatal(err)
}

// Get CodeQL version
version, _ := client.Version(ctx)
fmt.Printf("CodeQL version: %s\n", version)

Client Configuration

type Config struct {
    BinaryPath string        // Path to codeql binary (empty = search in PATH)
    Timeout    time.Duration // Analysis timeout (default: 30 minutes)
    QueriesDir string        // Custom queries directory
    Cache      bool          // Enable database caching (default: true)
}

// Default configuration
func DefaultConfig() Config {
    return Config{
        Timeout: 30 * time.Minute,
        Cache:   true,
    }
}

SARIF Parser (internal/codeql/sarif.go)

Parses CodeQL's SARIF 2.1.0 output format.

// Parse SARIF file
report, err := codeql.ParseSARIFFile("/path/to/results.sarif")
if err != nil {
    log.Fatal(err)
}

// Iterate results
for _, result := range report.GetResults() {
    fmt.Printf("Rule: %s\n", result.RuleID)
    fmt.Printf("Message: %s\n", result.Message.Text)
    for _, loc := range result.Locations {
        fmt.Printf("Location: %s:%d\n",
            loc.PhysicalLocation.ArtifactLocation.URI,
            loc.PhysicalLocation.Region.StartLine)
    }
}

SARIF Types

// SARIF Report structure
type SARIFReport struct {
    Schema  string `json:"$schema"`
    Version string `json:"version"`
    Runs    []Run  `json:"runs"`
}

// Run contains tool info and results
type Run struct {
    Tool    Tool     `json:"tool"`
    Results []Result `json:"results"`
}

// Result is an individual finding
type Result struct {
    RuleID    string     `json:"ruleId"`
    Level     string     `json:"level"`
    Message   Message    `json:"message"`
    Locations []Location `json:"locations"`
    CodeFlows []CodeFlow `json:"codeFlows,omitempty"`
}

// CodeFlow represents a data flow path
type CodeFlow struct {
    ThreadFlows []ThreadFlow `json:"threadFlows"`
}

// ThreadFlowLocation is a step in a flow
type ThreadFlowLocation struct {
    Location Location `json:"location"`
    Kinds    []string `json:"kinds,omitempty"`
}

CodeQL Analyzer (internal/codeql/integration.go)

Integrates CodeQL with mcp-scan's finding format.

// Create analyzer
cfg := codeql.AnalyzerConfig{
    Languages:   []string{"python", "javascript", "go"},
    MinSeverity: 5.0, // CVSS score threshold
}

analyzer, err := codeql.NewAnalyzer(cfg)
if err != nil {
    log.Fatal(err)
}

// Run analysis
findings, err := analyzer.Analyze(ctx, "/path/to/source")
if err != nil {
    log.Fatal(err)
}

// Process findings
for _, f := range findings {
    fmt.Printf("[%s] %s: %s\n", f.Severity, f.RuleID, f.Description)
}

CodeQL CLI Operations

Database Creation

// Create database for Python project
err := client.CreateDatabase(ctx,
    "/path/to/source",  // Source directory
    "/tmp/codeql-db",   // Database path
    "python")           // Language

if err != nil {
    log.Fatalf("Database creation failed: %v", err)
}

CLI equivalent:

codeql database create /tmp/codeql-db \
    --language=python \
    --source-root=/path/to/source \
    --overwrite

Database Analysis

// Analyze with security queries
err := client.AnalyzeDatabase(ctx,
    "/tmp/codeql-db",      // Database path
    "/tmp/results.sarif",  // Output path
    "codeql/python-queries:codeql-suites/python-security-extended.qls")

if err != nil {
    log.Fatalf("Analysis failed: %v", err)
}

CLI equivalent:

codeql database analyze /tmp/codeql-db \
    codeql/python-queries:codeql-suites/python-security-extended.qls \
    --format=sarifv2.1.0 \
    --output=/tmp/results.sarif \
    --sarif-add-snippets \
    --threads=0

Complete Scan

// One-step scan (create DB + analyze)
report, err := client.ScanDirectory(ctx,
    "/path/to/source",
    "python",
    "codeql/python-queries:codeql-suites/python-security-extended.qls")

if err != nil {
    log.Fatal(err)
}

// Process results
for _, result := range report.GetResults() {
    fmt.Printf("Found: %s\n", result.RuleID)
}

Custom MCP Queries

Query Directory Structure

resources/codeql/
├── queries/
│   ├── mcp-command-injection.ql
│   ├── mcp-path-traversal.ql
│   ├── mcp-ssrf.ql
│   └── mcp-sql-injection.ql
├── models/
│   ├── mcp-sources.yml
│   └── mcp-sinks.yml
└── qlpack.yml

MCP Command Injection Query

/**
 * @name MCP Tool Input to Command Execution
 * @description User input from MCP tool flows to command execution
 * @kind path-problem
 * @problem.severity error
 * @security-severity 9.8
 * @precision high
 * @id mcp/tool-input-to-exec
 * @tags security
 *       mcp
 *       rce
 */

import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking

class MCPToolSource extends DataFlow::Node {
  MCPToolSource() {
    exists(Function f, Decorator d |
      d.getName() = "tool" and
      f.getADecorator() = d and
      this.asExpr() = f.getArg(_)
    )
  }
}

class CommandExecSink extends DataFlow::Node {
  CommandExecSink() {
    exists(Call c |
      c.getFunc().(Attribute).getName() in ["system", "popen", "run", "call"] and
      this.asExpr() = c.getArg(0)
    )
  }
}

module MCPRCEConfig implements DataFlow::ConfigSig {
  predicate isSource(DataFlow::Node source) { source instanceof MCPToolSource }
  predicate isSink(DataFlow::Node sink) { sink instanceof CommandExecSink }
}

module MCPRCEFlow = TaintTracking::Global<MCPRCEConfig>;

from MCPRCEFlow::PathNode source, MCPRCEFlow::PathNode sink
where MCPRCEFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "MCP tool input flows to command execution"

MCP Source Model (YAML)

# models/mcp-sources.yml
extensions:
  - addsTo:
      pack: codeql/python-all
      extensible: sourceModel
    data:
      # FastMCP tool parameters
      - ["fastmcp", "Member[tool].Parameter[0:]", "remote", "mcp-tool-input"]
      # MCP SDK tool parameters
      - ["mcp.server", "Member[Server].Member[tool].Parameter[0:]", "remote", "mcp-tool-input"]

MCP Sink Model (YAML)

# models/mcp-sinks.yml
extensions:
  - addsTo:
      pack: codeql/python-all
      extensible: sinkModel
    data:
      # Command execution sinks
      - ["os", "Member[system].Argument[0]", "command-injection"]
      - ["subprocess", "Member[run].Argument[0]", "command-injection"]
      - ["subprocess", "Member[call].Argument[0]", "command-injection"]
      - ["subprocess", "Member[Popen].Argument[0]", "command-injection"]

Pattern Detector Integration

CodeQL Detector

// Create CodeQL detector
cfg := pattern.CodeQLDetectorConfig{
    Languages:   []string{"python", "javascript"},
    MinSeverity: 5.0,
    QueriesDir:  "resources/codeql/queries",
}

detector, err := pattern.NewCodeQLDetector(cfg)
if err != nil {
    log.Printf("CodeQL not available: %v", err)
}

// Check if enabled
if detector.IsEnabled() {
    fmt.Println("CodeQL analysis available")
}

Project-Level Scanning

// Create project scanner
scanner, err := pattern.NewCodeQLProjectScanner(cfg)
if err != nil {
    log.Fatal(err)
}

// Run full project scan
result, err := scanner.Scan(ctx, "/path/to/project")
if err != nil {
    log.Fatal(err)
}

// Check results
if result.Available && result.Success {
    fmt.Printf("Found %d issues\n", result.Stats.TotalFindings)
    for _, match := range result.Matches {
        fmt.Printf("[%s] %s\n", match.Severity, match.Description)
    }
}

Merging with MCP Findings

// Run both analyses
mcpFindings := mcpScanner.Scan(files)
codeqlMatches, _ := codeqlDetector.DetectProject(ctx, sourcePath)

// Merge findings (CodeQL confirms MCP findings)
merged := pattern.MergeMCPFindings(mcpFindings, codeqlMatches)

for _, f := range merged {
    confidence := "medium"
    if f.Evidence.CodeQLConfirmed {
        confidence = "high (CodeQL confirmed)"
    }
    fmt.Printf("[%s] %s - %s\n", f.Severity, f.Description, confidence)
}

Severity Mapping

CVSS to mcp-scan Severity

CVSS Score mcp-scan Severity
9.0 - 10.0 Critical
7.0 - 8.9 High
4.0 - 6.9 Medium
0.1 - 3.9 Low
0.0 Info

Precision to Confidence

CodeQL Precision mcp-scan Confidence
very-high High
high High
medium Medium
low Low

Vulnerability Class Mapping

func mapClass(ruleID string) types.VulnClass {
    ruleID = strings.ToLower(ruleID)

    switch {
    case strings.Contains(ruleID, "command-injection"):
        return types.ClassA  // RCE
    case strings.Contains(ruleID, "path-injection"):
        return types.ClassB  // Filesystem
    case strings.Contains(ruleID, "ssrf"):
        return types.ClassC  // Network
    case strings.Contains(ruleID, "sql-injection"):
        return types.ClassD  // SQLi
    case strings.Contains(ruleID, "hardcoded"):
        return types.ClassE  // Secrets
    case strings.Contains(ruleID, "jwt"):
        return types.ClassF  // Auth
    case strings.Contains(ruleID, "prompt"):
        return types.ClassG  // Injection
    default:
        return types.ClassUnknown
    }
}

Code Flow Conversion

CodeQL provides detailed code flow information:

func convertCodeFlow(flow CodeFlow, sourcePath string) []types.TraceStep {
    var steps []types.TraceStep

    for _, threadFlow := range flow.ThreadFlows {
        for _, loc := range threadFlow.Locations {
            step := types.TraceStep{
                Location: types.Location{
                    File:      filepath.Join(sourcePath, loc.Location.PhysicalLocation.ArtifactLocation.URI),
                    StartLine: loc.Location.PhysicalLocation.Region.StartLine,
                    StartCol:  loc.Location.PhysicalLocation.Region.StartColumn,
                },
                Action: strings.Join(loc.Kinds, ", "),
            }
            if loc.Location.Message != nil {
                step.Context = loc.Location.Message.Text
            }
            steps = append(steps, step)
        }
    }

    return steps
}

Performance Considerations

Database Creation Time

Language Lines of Code Creation Time
Python 10K ~30s
Python 100K ~3min
JavaScript 10K ~45s
JavaScript 100K ~5min

Analysis Time

Query Suite Time (100K LoC)
security-extended 2-5 min
security-and-quality 5-10 min
full suite 10-30 min

Database Caching

Enable caching to reuse databases:

cfg := codeql.Config{
    Cache: true,  // Keep database after analysis
}

client, _ := codeql.NewClient(cfg)
// First scan creates database
client.ScanDirectory(ctx, source, "python")
// Subsequent scans reuse database if source unchanged

Error Handling

CodeQL Not Available

if !codeql.IsAvailable() {
    log.Println("CodeQL CLI not found")
    log.Println("Install from: https://github.com/github/codeql-cli-binaries")
    return
}

Database Creation Failure

err := client.CreateDatabase(ctx, source, dbPath, language)
if err != nil {
    if strings.Contains(err.Error(), "No supported build system") {
        log.Println("Project structure not recognized")
        log.Println("Ensure proper build configuration exists")
    }
    return err
}

Analysis Timeout

ctx, cancel := context.WithTimeout(ctx, 30*time.Minute)
defer cancel()

report, err := client.ScanDirectory(ctx, source, language)
if errors.Is(err, context.DeadlineExceeded) {
    log.Println("Analysis timed out - try reducing scope")
}

Installation

CodeQL CLI

macOS:

brew install codeql

Linux:

# Download from GitHub releases
wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-linux64.zip
unzip codeql-linux64.zip
export PATH="$PATH:$(pwd)/codeql"

Verify installation:

codeql version

Query Packs

# Install standard query packs
codeql pack download codeql/python-queries
codeql pack download codeql/javascript-queries
codeql pack download codeql/go-queries

CLI Usage

# Scan with CodeQL enabled
mcp-scan scan /path/to/project --codeql

# Specify languages
mcp-scan scan /path/to/project --codeql --languages python,javascript

# Custom queries
mcp-scan scan /path/to/project --codeql --codeql-queries ./custom-queries/

# Minimum severity
mcp-scan scan /path/to/project --codeql --min-severity 7.0

API Reference

Client Methods

Method Parameters Returns Description
NewClient cfg Config *Client, error Create client
IsAvailable - bool Check if CodeQL installed
Version ctx string, error Get CodeQL version
CreateDatabase ctx, source, dbPath, lang error Create database
AnalyzeDatabase ctx, dbPath, output, queries... error Run queries
ScanDirectory ctx, source, lang, queries... *SARIFReport, error Complete scan
RunQuery ctx, dbPath, queryPath *SARIFReport, error Run single query
SupportedLanguages ctx []string, error List languages

Analyzer Methods

Method Parameters Returns Description
NewAnalyzer cfg AnalyzerConfig *Analyzer, error Create analyzer
Analyze ctx, sourcePath []types.Finding, error Run full analysis

SARIF Methods

Method Parameters Returns Description
ParseSARIFFile path *SARIFReport, error Parse SARIF file
ParseSARIF data []byte *SARIFReport, error Parse SARIF data
GetResults - []Result Get all results
GetRules - []Rule Get all rules
FindRule id *Rule Find rule by ID

See Also