CodeQL Integration¶

Technical document for security analysts

1. Introduction¶

mcp-scan integrates CodeQL as a secondary analysis engine to confirm findings and detect complex vulnerabilities that require advanced data flow analysis. CodeQL is especially useful for finding vulnerabilities that require following the flow through multiple functions and files.

2. Integration Architecture¶

2.1 Component Diagram¶

+------------------+
|    mcp-scan      |
|  Pattern/Taint   |  <-- Primary detection
+------------------+
        |
        v
+------------------+
|  CodeQL Client   |  <-- internal/codeql/client.go
+------------------+
        |
        v
+------------------+
|   CodeQL CLI     |  <-- External binary
+------------------+
        |
        +---> database create (AST extraction)
        +---> database analyze (query execution)
        |
        v
+------------------+
|  SARIF Results   |
+------------------+
        |
        v
+------------------+
|  Finding Merger  |  <-- Combines with mcp-scan results
+------------------+

2.2 Code Location¶

Main file: internal/codeql/client.go

type Client struct {
    binaryPath   string        // Path to codeql binary
    timeout      time.Duration // Analysis timeout
    queriesDir   string        // Custom queries directory
    cacheEnabled bool          // Cache databases
}

3. Requirements¶

3.1 CodeQL Installation¶

macOS¶

# Via Homebrew
brew install codeql

# Or direct download
wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-osx64.zip
unzip codeql-osx64.zip
export PATH=$PATH:$(pwd)/codeql

Linux¶

wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-linux64.zip
unzip codeql-linux64.zip
export PATH=$PATH:$(pwd)/codeql

Windows¶

# Download from GitHub releases and add to PATH

3.2 Installation Verification¶

codeql version

Should show version >= 2.14.0

3.3 Required Query Packs¶

# Download standard packs
codeql pack download codeql/python-queries
codeql pack download codeql/javascript-queries
codeql pack download codeql/go-queries

4. CodeQL Client¶

4.1 Configuration¶

type Config struct {
    BinaryPath string        // Path to codeql (empty = search in PATH)
    Timeout    time.Duration // Analysis timeout (default: 30 min)
    QueriesDir string        // Custom queries
    Cache      bool          // Cache DBs (default: true)
}

func DefaultConfig() Config {
    return Config{
        Timeout: 30 * time.Minute,
        Cache:   true,
    }
}

4.2 Initialization¶

func NewClient(cfg Config) (*Client, error) {
    binaryPath := cfg.BinaryPath
    if binaryPath == "" {
        // Search in PATH
        path, err := exec.LookPath("codeql")
        if err != nil {
            return nil, fmt.Errorf("codeql not found in PATH: %w", err)
        }
        binaryPath = path
    }

    // Verify it works
    cmd := exec.Command(binaryPath, "version")
    if err := cmd.Run(); err != nil {
        return nil, fmt.Errorf("codeql version check failed: %w", err)
    }

    return &Client{
        binaryPath:   binaryPath,
        timeout:      cfg.Timeout,
        queriesDir:   cfg.QueriesDir,
        cacheEnabled: cfg.Cache,
    }, nil
}

4.3 Availability Check¶

func IsAvailable() bool {
    _, err := exec.LookPath("codeql")
    return err == nil
}

5. Executed Commands¶

5.1 Get Version¶

func (c *Client) Version(ctx context.Context) (string, error) {
    cmd := exec.CommandContext(ctx, c.binaryPath, "version", "--format=json")
    output, err := cmd.Output()
    if err != nil {
        return "", fmt.Errorf("codeql version failed: %w", err)
    }

    var result struct {
        Version string `json:"version"`
    }
    if err := json.Unmarshal(output, &result); err != nil {
        return strings.TrimSpace(string(output)), nil
    }
    return result.Version, nil
}

Executed command:

codeql version --format=json

5.2 Create Database¶

func (c *Client) CreateDatabase(ctx context.Context, sourcePath, dbPath, language string) error {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    args := []string{
        "database", "create",
        dbPath,
        "--language=" + language,
        "--source-root=" + sourcePath,
        "--overwrite",
    }

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)
    cmd.Dir = sourcePath

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return fmt.Errorf("database create failed: %w, stderr: %s", err, stderr.String())
    }
    return nil
}

Executed command:

codeql database create /tmp/mcp-scan-codeql-xxx/db \
    --language=python \
    --source-root=/path/to/code \
    --overwrite

Supported languages: | Language | Value | |----------|-------| | Python | python | | JavaScript | javascript | | TypeScript | javascript (same extractor) | | Go | go |

5.3 Analyze Database¶

func (c *Client) AnalyzeDatabase(ctx context.Context, dbPath, outputPath string, queries ...string) error {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    if len(queries) == 0 {
        queries = c.getDefaultQueries(dbPath)
    }

    args := []string{
        "database", "analyze",
        dbPath,
        "--format=sarifv2.1.0",
        "--output=" + outputPath,
        "--sarif-add-snippets",
        "--threads=0",  // Use all cores
    }
    args = append(args, queries...)

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return fmt.Errorf("database analyze failed: %w, stderr: %s", err, stderr.String())
    }
    return nil
}

Executed command:

codeql database analyze /tmp/mcp-scan-codeql-xxx/db \
    --format=sarifv2.1.0 \
    --output=/tmp/mcp-scan-codeql-xxx/results.sarif \
    --sarif-add-snippets \
    --threads=0 \
    codeql/python-queries:codeql-suites/python-security-extended.qls

5.4 Complete Scan (Combined)¶

func (c *Client) ScanDirectory(ctx context.Context, sourcePath, language string, queries ...string) (*SARIFReport, error) {
    // Create temporary directory
    tmpDir, err := os.MkdirTemp("", "mcp-scan-codeql-*")
    if err != nil {
        return nil, fmt.Errorf("failed to create temp dir: %w", err)
    }
    if !c.cacheEnabled {
        defer os.RemoveAll(tmpDir)
    }

    dbPath := filepath.Join(tmpDir, "db")
    sarifPath := filepath.Join(tmpDir, "results.sarif")

    // 1. Create database
    if err := c.CreateDatabase(ctx, sourcePath, dbPath, language); err != nil {
        return nil, err
    }

    // 2. Analyze
    if err := c.AnalyzeDatabase(ctx, dbPath, sarifPath, queries...); err != nil {
        return nil, err
    }

    // 3. Parse results
    return ParseSARIFFile(sarifPath)
}

5.5 Run Individual Query¶

func (c *Client) RunQuery(ctx context.Context, dbPath, queryPath string) (*SARIFReport, error) {
    ctx, cancel := context.WithTimeout(ctx, c.timeout)
    defer cancel()

    tmpDir, err := os.MkdirTemp("", "codeql-query-*")
    if err != nil {
        return nil, err
    }
    defer os.RemoveAll(tmpDir)

    sarifPath := filepath.Join(tmpDir, "results.sarif")

    args := []string{
        "database", "analyze",
        dbPath,
        queryPath,
        "--format=sarifv2.1.0",
        "--output=" + sarifPath,
        "--sarif-add-snippets",
    }

    cmd := exec.CommandContext(ctx, c.binaryPath, args...)

    var stderr bytes.Buffer
    cmd.Stderr = &stderr

    if err := cmd.Run(); err != nil {
        return nil, fmt.Errorf("query failed: %w, stderr: %s", err, stderr.String())
    }

    return ParseSARIFFile(sarifPath)
}

6. Query Suites¶

6.1 Default Suites¶

func (c *Client) getDefaultQueries(dbPath string) []string {
    queries := []string{}

    // Add custom queries if they exist
    if c.queriesDir != "" {
        queries = append(queries, c.queriesDir)
    }

    // Add standard security suites
    queries = append(queries,
        "codeql/go-queries:codeql-suites/go-security-extended.qls",
        "codeql/python-queries:codeql-suites/python-security-extended.qls",
        "codeql/javascript-queries:codeql-suites/javascript-security-extended.qls",
    )

    return queries
}

6.2 Available Suites¶

Language	Suite	Coverage
Python	`python-security-extended.qls`	Complete security
Python	`python-security-and-quality.qls`	Security + quality
JavaScript	`javascript-security-extended.qls`	Complete security
JavaScript	`javascript-security-and-quality.qls`	Security + quality
Go	`go-security-extended.qls`	Complete security
Go	`go-security-and-quality.qls`	Security + quality

6.3 MCP-Relevant Queries¶

Query	Language	Detects
`py/command-line-injection`	Python	RCE via subprocess
`py/code-injection`	Python	eval/exec with input
`py/path-injection`	Python	Path traversal
`py/sql-injection`	Python	SQLi
`py/ssrf`	Python	SSRF
`py/clear-text-logging-sensitive-data`	Python	Secret logging
`js/command-line-injection`	JS/TS	RCE via child_process
`js/code-injection`	JS/TS	eval with input
`js/path-injection`	JS/TS	Path traversal
`js/sql-injection`	JS/TS	SQLi
`js/request-forgery`	JS/TS	SSRF

7. SARIF Results Parsing¶

7.1 SARIF Structure¶

type SARIFReport struct {
    Schema  string `json:"$schema"`
    Version string `json:"version"`
    Runs    []Run  `json:"runs"`
}

type Run struct {
    Tool    Tool     `json:"tool"`
    Results []Result `json:"results"`
}

type Result struct {
    RuleID   string     `json:"ruleId"`
    Level    string     `json:"level"`
    Message  Message    `json:"message"`
    Locations []Location `json:"locations"`
}

7.2 File Parsing¶

func ParseSARIFFile(path string) (*SARIFReport, error) {
    data, err := os.ReadFile(path)
    if err != nil {
        return nil, fmt.Errorf("failed to read SARIF: %w", err)
    }

    var report SARIFReport
    if err := json.Unmarshal(data, &report); err != nil {
        return nil, fmt.Errorf("failed to parse SARIF: %w", err)
    }

    return &report, nil
}

8. Finding Merging¶

8.1 Merge Strategy¶

CodeQL findings are merged with mcp-scan findings:

func MergeFindings(mcpFindings []types.Finding, codeqlReport *codeql.SARIFReport) []types.Finding {
    merged := make([]types.Finding, 0, len(mcpFindings))

    // 1. Add mcp-scan findings
    for _, f := range mcpFindings {
        merged = append(merged, f)
    }

    // 2. Add CodeQL findings that are not duplicates
    for _, run := range codeqlReport.Runs {
        for _, result := range run.Results {
            finding := convertCodeQLResult(result)

            // Check if it already exists
            if !isDuplicate(merged, finding) {
                // Mark as confirmed by CodeQL
                finding.Evidence.CodeQLConfirmed = true
                merged = append(merged, finding)
            }
        }
    }

    // 3. Mark mcp-scan findings confirmed by CodeQL
    for i, f := range merged {
        if hasCodeQLConfirmation(codeqlReport, f) {
            merged[i].Evidence.CodeQLConfirmed = true
            // Elevate confidence if it was medium
            if merged[i].Confidence == types.ConfidenceMedium {
                merged[i].Confidence = types.ConfidenceHigh
            }
        }
    }

    return merged
}

8.2 CodeQL Result Conversion¶

func convertCodeQLResult(result codeql.Result) types.Finding {
    loc := result.Locations[0].PhysicalLocation

    // Map severity level
    severity := mapCodeQLLevel(result.Level)

    // Map rule ID to vulnerability class
    class := mapCodeQLRule(result.RuleID)

    return types.Finding{
        RuleID:      "CODEQL-" + result.RuleID,
        Severity:    severity,
        Confidence:  types.ConfidenceHigh,  // CodeQL has high precision
        Class:       class,
        Location: types.Location{
            File:      loc.ArtifactLocation.URI,
            StartLine: loc.Region.StartLine,
            EndLine:   loc.Region.EndLine,
        },
        Description: result.Message.Text,
        Evidence: types.Evidence{
            Snippet:         loc.Region.Snippet.Text,
            CodeQLConfirmed: true,
        },
    }
}

func mapCodeQLLevel(level string) types.Severity {
    switch level {
    case "error":
        return types.SeverityCritical
    case "warning":
        return types.SeverityHigh
    case "note":
        return types.SeverityMedium
    default:
        return types.SeverityInfo
    }
}

8.3 Duplicate Detection¶

func isDuplicate(findings []types.Finding, candidate types.Finding) bool {
    for _, f := range findings {
        // Same file and similar line
        if f.Location.File == candidate.Location.File &&
           abs(f.Location.StartLine - candidate.Location.StartLine) <= 2 {
            // Same vulnerability class
            if f.Class == candidate.Class {
                return true
            }
        }
    }
    return false
}

9. Configuration¶

9.1 In .mcp-scan.yaml¶

codeql:
  enabled: true
  binary_path: ""  # Empty = search in PATH
  timeout: "30m"
  cache: true
  queries_dir: "./custom-queries"  # Optional

  # Additional suites
  extra_suites:
    - "my-custom-suite.qls"

9.2 Environment Variables¶

# Alternative to config file
export MCP_SCAN_CODEQL_ENABLED=true
export MCP_SCAN_CODEQL_PATH=/path/to/codeql
export MCP_SCAN_CODEQL_TIMEOUT=30m

10. Supported Languages¶

func (c *Client) SupportedLanguages(ctx context.Context) ([]string, error) {
    cmd := exec.CommandContext(ctx, c.binaryPath, "resolve", "languages")
    output, err := cmd.Output()
    if err != nil {
        return nil, err
    }

    var languages []string
    for _, line := range strings.Split(string(output), "\n") {
        line = strings.TrimSpace(line)
        if line != "" && !strings.HasPrefix(line, "#") {
            parts := strings.Fields(line)
            if len(parts) > 0 {
                languages = append(languages, parts[0])
            }
        }
    }
    return languages, nil
}

Executed command:

codeql resolve languages

Typical output:

cpp (cpp-queries)
csharp (csharp-queries)
go (go-queries)
java (java-queries)
javascript (javascript-queries)
python (python-queries)
ruby (ruby-queries)
swift (swift-queries)

11. Advantages and Disadvantages¶

11.1 CodeQL Advantages¶

High precision: Very few false positives
Deep analysis: Follows flow between functions/files
Proven queries: Maintained by GitHub Security Lab
SARIF standard: Easy integration
Extensible: Allows custom queries

11.2 Disadvantages¶

Slow: Can take minutes
Heavy: Requires creating complete database
External dependency: Requires CodeQL CLI installed
Limited languages: Not all languages supported
Not MCP-aware: Doesn't understand MCP surface specifically

11.3 When to Use CodeQL¶

Recommended: - Deep analysis for certification - Confirmation of critical findings - Large projects with multiple files - When high confidence is needed

Not recommended: - Fast CI/CD (use fast mode without CodeQL) - Small single-file projects - When Ollama is not available

12. Troubleshooting¶

12.1 CodeQL Not Found¶

Error:

codeql not found in PATH

Solution:

# Verify installation
which codeql

# Add to PATH
export PATH=$PATH:/path/to/codeql

12.2 DB Creation Fails¶

Error:

database create failed: No source files found

Possible causes: 1. Empty directory 2. Incorrect language 3. Files excluded by .gitignore

Solution:

# Verify files
find /path/to/code -name "*.py" | wc -l

# Specify correct language
codeql database create db --language=python --source-root=/path/to/code

12.3 Analysis Timeout¶

Error:

context deadline exceeded

Solution:

# Increase timeout in config
codeql:
  timeout: "60m"

12.4 Queries Not Found¶

Error:

Could not find pack codeql/python-queries

Solution:

# Download packs
codeql pack download codeql/python-queries

13. Complete Flow Example¶

13.1 Code to Analyze¶

# server.py
import subprocess
from mcp import Server

server = Server()

@server.tool()
def run_command(cmd: str):
    """Execute a shell command."""
    result = subprocess.run(cmd, shell=True, capture_output=True)
    return result.stdout.decode()

13.2 Execution¶

# With CodeQL enabled
mcp-scan scan ./server.py --codeql

13.3 Internal Process¶

mcp-scan detects (Pattern + Taint):
MCP-A001: Tool input flows to command execution
Confidence: High

CodeQL analyzes:

codeql database create /tmp/xxx/db --language=python --source-root=.
codeql database analyze /tmp/xxx/db --format=sarifv2.1.0 --output=results.sarif python-security-extended.qls

CodeQL finds:
py/command-line-injection
Level: error
Merge:
mcp-scan finding marked as CodeQLConfirmed: true
Confidence elevated if it was Medium

13.4 Final Output¶

{
  "findings": [
    {
      "rule_id": "MCP-A001",
      "severity": "critical",
      "confidence": "high",
      "location": {
        "file": "server.py",
        "line": 10
      },
      "description": "Tool input flows to command execution",
      "evidence": {
        "snippet": "subprocess.run(cmd, shell=True, ...)",
        "codeql_confirmed": true
      }
    }
  ],
  "codeql_version": "2.15.1",
  "codeql_queries_run": ["python-security-extended.qls"]
}

Next document: vulnerability-classes.md