Skip to content

CodeQL Integration

Technical document for security analysts


1. Introduction

mcp-scan integrates CodeQL as a secondary analysis engine to confirm findings and detect complex vulnerabilities that require advanced data flow analysis. CodeQL is especially useful for finding vulnerabilities that require following the flow through multiple functions and files.


2. Integration Architecture

2.1 Component Diagram

+------------------+
|    mcp-scan      |
|  Pattern/Taint   |  <-- Primary detection
+------------------+
        |
        v
+------------------+
|  CodeQL Client   |  <-- internal/codeql/client.go
+------------------+
        |
        v
+------------------+
|   CodeQL CLI     |  <-- External binary
+------------------+
        |
        +---> database create (AST extraction)
        +---> database analyze (query execution)
        |
        v
+------------------+
|  SARIF Results   |
+------------------+
        |
        v
+------------------+
|  Finding Merger  |  <-- Combines with mcp-scan results
+------------------+

2.2 Code Location

Main file: internal/codeql/client.go

type Client struct {
    binaryPath   string        // Path to codeql binary
    timeout      time.Duration // Analysis timeout
    queriesDir   string        // Custom queries directory
    cacheEnabled bool          // Cache databases
}

3. Requirements

3.1 CodeQL Installation

macOS

# Via Homebrew
brew install codeql

# Or direct download
wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-osx64.zip
unzip codeql-osx64.zip
export PATH=$PATH:$(pwd)/codeql

Linux

wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-linux64.zip
unzip codeql-linux64.zip
export PATH=$PATH:$(pwd)/codeql

Windows

# Download from GitHub releases and add to PATH

3.2 Installation Verification

codeql version

Should show version >= 2.14.0

3.3 Required Query Packs

# Download standard packs
codeql pack download codeql/python-queries
codeql pack download codeql/javascript-queries
codeql pack download codeql/go-queries

4. CodeQL Client

4.1 Configuration

type Config struct {
    BinaryPath string        // Path to codeql (empty = search in PATH)
    Timeout    time.Duration // Analysis timeout (default: 30 min)
    QueriesDir string        // Custom queries
    Cache      bool          // Cache DBs (default: true)
}

func DefaultConfig() Config {
    return Config{
        Timeout: 30 * time.Minute,
        Cache:   true,
    }
}

4.2 Initialization

func NewClient(cfg Config) (*Client, error) {
    binaryPath := cfg.BinaryPath
    if binaryPath == "" {
        // Search in PATH
        path, err := exec.LookPath("codeql")
        if err != nil {
            return nil, fmt.Errorf("codeql not found in PATH: %w", err)
        }
        binaryPath = path
    }

    // Verify it works
    cmd := exec.Command(binaryPath, "version")
    if err := cmd.Run(); err != nil {
        return nil, fmt.Errorf("codeql version check failed: %w", err)
    }

    return &Client{
        binaryPath:   binaryPath,
        timeout:      cfg.Timeout,
        queriesDir:   cfg.QueriesDir,
        cacheEnabled: cfg.Cache,
    }, nil
}

4.3 Availability Check

func IsAvailable() bool {
    _, err := exec.LookPath("codeql")
    return err == nil
}

5. Executed Commands

5.1 Get Version

func (c *Client) Version(ctx context.Context) (string, error) {
    cmd := exec.CommandContext(ctx, c.binaryPath, "version", "--format=json")
    output, err := cmd.Output()
    if err != nil {
        return "", fmt.Errorf("codeql version failed: %w", err)
    }

    var result struct {
        Version string `json:"version"`
    }
    if err := json.Unmarshal(output, &result); err != nil {
        return strings.TrimSpace(string(output)), nil
    }
    return result.Version, nil
}

Executed command:

codeql version --format=json

5.2 Create Database

func (c *Client) CreateDatabase(ctx context.Context, sourcePath, dbPath, language string) error {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    args := []string{
        "database", "create",
        dbPath,
        "--language=" + language,
        "--source-root=" + sourcePath,
        "--overwrite",
    }

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)
    cmd.Dir = sourcePath

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return fmt.Errorf("database create failed: %w, stderr: %s", err, stderr.String())
    }
    return nil
}

Executed command:

codeql database create /tmp/mcp-scan-codeql-xxx/db \
    --language=python \
    --source-root=/path/to/code \
    --overwrite

Supported languages: | Language | Value | |----------|-------| | Python | python | | JavaScript | javascript | | TypeScript | javascript (same extractor) | | Go | go |

5.3 Analyze Database

func (c *Client) AnalyzeDatabase(ctx context.Context, dbPath, outputPath string, queries ...string) error {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    if len(queries) == 0 {
        queries = c.getDefaultQueries(dbPath)
    }

    args := []string{
        "database", "analyze",
        dbPath,
        "--format=sarifv2.1.0",
        "--output=" + outputPath,
        "--sarif-add-snippets",
        "--threads=0",  // Use all cores
    }
    args = append(args, queries...)

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return fmt.Errorf("database analyze failed: %w, stderr: %s", err, stderr.String())
    }
    return nil
}

Executed command:

codeql database analyze /tmp/mcp-scan-codeql-xxx/db \
    --format=sarifv2.1.0 \
    --output=/tmp/mcp-scan-codeql-xxx/results.sarif \
    --sarif-add-snippets \
    --threads=0 \
    codeql/python-queries:codeql-suites/python-security-extended.qls

5.4 Complete Scan (Combined)

func (c *Client) ScanDirectory(ctx context.Context, sourcePath, language string, queries ...string) (*SARIFReport, error) {
    // Create temporary directory
    tmpDir, err := os.MkdirTemp("", "mcp-scan-codeql-*")
    if err != nil {
        return nil, fmt.Errorf("failed to create temp dir: %w", err)
    }
    if !c.cacheEnabled {
        defer os.RemoveAll(tmpDir)
    }

    dbPath := filepath.Join(tmpDir, "db")
    sarifPath := filepath.Join(tmpDir, "results.sarif")

    // 1. Create database
    if err := c.CreateDatabase(ctx, sourcePath, dbPath, language); err != nil {
        return nil, err
    }

    // 2. Analyze
    if err := c.AnalyzeDatabase(ctx, dbPath, sarifPath, queries...); err != nil {
        return nil, err
    }

    // 3. Parse results
    return ParseSARIFFile(sarifPath)
}

5.5 Run Individual Query

func (c *Client) RunQuery(ctx context.Context, dbPath, queryPath string) (*SARIFReport, error) {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    tmpDir, err := os.MkdirTemp("", "codeql-query-*")
    if err != nil {
        return nil, err
    }
    defer os.RemoveAll(tmpDir)

    sarifPath := filepath.Join(tmpDir, "results.sarif")

    args := []string{
        "database", "analyze",
        dbPath,
        queryPath,
        "--format=sarifv2.1.0",
        "--output=" + sarifPath,
        "--sarif-add-snippets",
    }

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return nil, fmt.Errorf("query failed: %w, stderr: %s", err, stderr.String())
    }

    return ParseSARIFFile(sarifPath)
}

6. Query Suites

6.1 Default Suites

func (c *Client) getDefaultQueries(dbPath string) []string {
    queries := []string{}

    // Add custom queries if they exist
    if c.queriesDir != "" {
        queries = append(queries, c.queriesDir)
    }

    // Add standard security suites
    queries = append(queries,
        "codeql/go-queries:codeql-suites/go-security-extended.qls",
        "codeql/python-queries:codeql-suites/python-security-extended.qls",
        "codeql/javascript-queries:codeql-suites/javascript-security-extended.qls",
    )

    return queries
}

6.2 Available Suites

Language Suite Coverage
Python python-security-extended.qls Complete security
Python python-security-and-quality.qls Security + quality
JavaScript javascript-security-extended.qls Complete security
JavaScript javascript-security-and-quality.qls Security + quality
Go go-security-extended.qls Complete security
Go go-security-and-quality.qls Security + quality

6.3 MCP-Relevant Queries

Query Language Detects
py/command-line-injection Python RCE via subprocess
py/code-injection Python eval/exec with input
py/path-injection Python Path traversal
py/sql-injection Python SQLi
py/ssrf Python SSRF
py/clear-text-logging-sensitive-data Python Secret logging
js/command-line-injection JS/TS RCE via child_process
js/code-injection JS/TS eval with input
js/path-injection JS/TS Path traversal
js/sql-injection JS/TS SQLi
js/request-forgery JS/TS SSRF

7. SARIF Results Parsing

7.1 SARIF Structure

type SARIFReport struct {
    Schema  string `json:"$schema"`
    Version string `json:"version"`
    Runs    []Run  `json:"runs"`
}

type Run struct {
    Tool    Tool     `json:"tool"`
    Results []Result `json:"results"`
}

type Result struct {
    RuleID   string     `json:"ruleId"`
    Level    string     `json:"level"`
    Message  Message    `json:"message"`
    Locations []Location `json:"locations"`
}

7.2 File Parsing

func ParseSARIFFile(path string) (*SARIFReport, error) {
    data, err := os.ReadFile(path)
    if err != nil {
        return nil, fmt.Errorf("failed to read SARIF: %w", err)
    }

    var report SARIFReport
    if err := json.Unmarshal(data, &report); err != nil {
        return nil, fmt.Errorf("failed to parse SARIF: %w", err)
    }

    return &report, nil
}

8. Finding Merging

8.1 Merge Strategy

CodeQL findings are merged with mcp-scan findings:

func MergeFindings(mcpFindings []types.Finding, codeqlReport *codeql.SARIFReport) []types.Finding {
    merged := make([]types.Finding, 0, len(mcpFindings))

    // 1. Add mcp-scan findings
    for _, f := range mcpFindings {
        merged = append(merged, f)
    }

    // 2. Add CodeQL findings that are not duplicates
    for _, run := range codeqlReport.Runs {
        for _, result := range run.Results {
            finding := convertCodeQLResult(result)

            // Check if it already exists
            if !isDuplicate(merged, finding) {
                // Mark as confirmed by CodeQL
                finding.Evidence.CodeQLConfirmed = true
                merged = append(merged, finding)
            }
        }
    }

    // 3. Mark mcp-scan findings confirmed by CodeQL
    for i, f := range merged {
        if hasCodeQLConfirmation(codeqlReport, f) {
            merged[i].Evidence.CodeQLConfirmed = true
            // Elevate confidence if it was medium
            if merged[i].Confidence == types.ConfidenceMedium {
                merged[i].Confidence = types.ConfidenceHigh
            }
        }
    }

    return merged
}

8.2 CodeQL Result Conversion

func convertCodeQLResult(result codeql.Result) types.Finding {
    loc := result.Locations[0].PhysicalLocation

    // Map severity level
    severity := mapCodeQLLevel(result.Level)

    // Map rule ID to vulnerability class
    class := mapCodeQLRule(result.RuleID)

    return types.Finding{
        RuleID:      "CODEQL-" + result.RuleID,
        Severity:    severity,
        Confidence:  types.ConfidenceHigh,  // CodeQL has high precision
        Class:       class,
        Location: types.Location{
            File:      loc.ArtifactLocation.URI,
            StartLine: loc.Region.StartLine,
            EndLine:   loc.Region.EndLine,
        },
        Description: result.Message.Text,
        Evidence: types.Evidence{
            Snippet:         loc.Region.Snippet.Text,
            CodeQLConfirmed: true,
        },
    }
}

func mapCodeQLLevel(level string) types.Severity {
    switch level {
    case "error":
        return types.SeverityCritical
    case "warning":
        return types.SeverityHigh
    case "note":
        return types.SeverityMedium
    default:
        return types.SeverityInfo
    }
}

8.3 Duplicate Detection

func isDuplicate(findings []types.Finding, candidate types.Finding) bool {
    for _, f := range findings {
        // Same file and similar line
        if f.Location.File == candidate.Location.File &&
           abs(f.Location.StartLine - candidate.Location.StartLine) <= 2 {
            // Same vulnerability class
            if f.Class == candidate.Class {
                return true
            }
        }
    }
    return false
}

9. Configuration

9.1 In .mcp-scan.yaml

codeql:
  enabled: true
  binary_path: ""  # Empty = search in PATH
  timeout: "30m"
  cache: true
  queries_dir: "./custom-queries"  # Optional

  # Additional suites
  extra_suites:
    - "my-custom-suite.qls"

9.2 Environment Variables

# Alternative to config file
export MCP_SCAN_CODEQL_ENABLED=true
export MCP_SCAN_CODEQL_PATH=/path/to/codeql
export MCP_SCAN_CODEQL_TIMEOUT=30m

10. Supported Languages

func (c *Client) SupportedLanguages(ctx context.Context) ([]string, error) {
    cmd := exec.CommandContext(ctx, c.binaryPath, "resolve", "languages")
    output, err := cmd.Output()
    if err != nil {
        return nil, err
    }

    var languages []string
    for _, line := range strings.Split(string(output), "\n") {
        line = strings.TrimSpace(line)
        if line != "" && !strings.HasPrefix(line, "#") {
            parts := strings.Fields(line)
            if len(parts) > 0 {
                languages = append(languages, parts[0])
            }
        }
    }
    return languages, nil
}

Executed command:

codeql resolve languages

Typical output:

cpp (cpp-queries)
csharp (csharp-queries)
go (go-queries)
java (java-queries)
javascript (javascript-queries)
python (python-queries)
ruby (ruby-queries)
swift (swift-queries)


11. Advantages and Disadvantages

11.1 CodeQL Advantages

  1. High precision: Very few false positives
  2. Deep analysis: Follows flow between functions/files
  3. Proven queries: Maintained by GitHub Security Lab
  4. SARIF standard: Easy integration
  5. Extensible: Allows custom queries

11.2 Disadvantages

  1. Slow: Can take minutes
  2. Heavy: Requires creating complete database
  3. External dependency: Requires CodeQL CLI installed
  4. Limited languages: Not all languages supported
  5. Not MCP-aware: Doesn't understand MCP surface specifically

11.3 When to Use CodeQL

Recommended: - Deep analysis for certification - Confirmation of critical findings - Large projects with multiple files - When high confidence is needed

Not recommended: - Fast CI/CD (use fast mode without CodeQL) - Small single-file projects - When Ollama is not available


12. Troubleshooting

12.1 CodeQL Not Found

Error:

codeql not found in PATH

Solution:

# Verify installation
which codeql

# Add to PATH
export PATH=$PATH:/path/to/codeql

12.2 DB Creation Fails

Error:

database create failed: No source files found

Possible causes: 1. Empty directory 2. Incorrect language 3. Files excluded by .gitignore

Solution:

# Verify files
find /path/to/code -name "*.py" | wc -l

# Specify correct language
codeql database create db --language=python --source-root=/path/to/code

12.3 Analysis Timeout

Error:

context deadline exceeded

Solution:

# Increase timeout in config
codeql:
  timeout: "60m"

12.4 Queries Not Found

Error:

Could not find pack codeql/python-queries

Solution:

# Download packs
codeql pack download codeql/python-queries


13. Complete Flow Example

13.1 Code to Analyze

# server.py
import subprocess
from mcp import Server

server = Server()

@server.tool()
def run_command(cmd: str):
    """Execute a shell command."""
    result = subprocess.run(cmd, shell=True, capture_output=True)
    return result.stdout.decode()

13.2 Execution

# With CodeQL enabled
mcp-scan scan ./server.py --codeql

13.3 Internal Process

  1. mcp-scan detects (Pattern + Taint):
  2. MCP-A001: Tool input flows to command execution
  3. Confidence: High

  4. CodeQL analyzes:

    codeql database create /tmp/xxx/db --language=python --source-root=.
    codeql database analyze /tmp/xxx/db --format=sarifv2.1.0 --output=results.sarif python-security-extended.qls
    

  5. CodeQL finds:

  6. py/command-line-injection
  7. Level: error

  8. Merge:

  9. mcp-scan finding marked as CodeQLConfirmed: true
  10. Confidence elevated if it was Medium

13.4 Final Output

{
  "findings": [
    {
      "rule_id": "MCP-A001",
      "severity": "critical",
      "confidence": "high",
      "location": {
        "file": "server.py",
        "line": 10
      },
      "description": "Tool input flows to command execution",
      "evidence": {
        "snippet": "subprocess.run(cmd, shell=True, ...)",
        "codeql_confirmed": true
      }
    }
  ],
  "codeql_version": "2.15.1",
  "codeql_queries_run": ["python-security-extended.qls"]
}

Next document: vulnerability-classes.md