CodeQL Integration¶
Technical document for security analysts
1. Introduction¶
mcp-scan integrates CodeQL as a secondary analysis engine to confirm findings and detect complex vulnerabilities that require advanced data flow analysis. CodeQL is especially useful for finding vulnerabilities that require following the flow through multiple functions and files.
2. Integration Architecture¶
2.1 Component Diagram¶
+------------------+
| mcp-scan |
| Pattern/Taint | <-- Primary detection
+------------------+
|
v
+------------------+
| CodeQL Client | <-- internal/codeql/client.go
+------------------+
|
v
+------------------+
| CodeQL CLI | <-- External binary
+------------------+
|
+---> database create (AST extraction)
+---> database analyze (query execution)
|
v
+------------------+
| SARIF Results |
+------------------+
|
v
+------------------+
| Finding Merger | <-- Combines with mcp-scan results
+------------------+
2.2 Code Location¶
Main file: internal/codeql/client.go
type Client struct {
binaryPath string // Path to codeql binary
timeout time.Duration // Analysis timeout
queriesDir string // Custom queries directory
cacheEnabled bool // Cache databases
}
3. Requirements¶
3.1 CodeQL Installation¶
macOS¶
# Via Homebrew
brew install codeql
# Or direct download
wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-osx64.zip
unzip codeql-osx64.zip
export PATH=$PATH:$(pwd)/codeql
Linux¶
wget https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-linux64.zip
unzip codeql-linux64.zip
export PATH=$PATH:$(pwd)/codeql
Windows¶
3.2 Installation Verification¶
Should show version >= 2.14.0
3.3 Required Query Packs¶
# Download standard packs
codeql pack download codeql/python-queries
codeql pack download codeql/javascript-queries
codeql pack download codeql/go-queries
4. CodeQL Client¶
4.1 Configuration¶
type Config struct {
BinaryPath string // Path to codeql (empty = search in PATH)
Timeout time.Duration // Analysis timeout (default: 30 min)
QueriesDir string // Custom queries
Cache bool // Cache DBs (default: true)
}
func DefaultConfig() Config {
return Config{
Timeout: 30 * time.Minute,
Cache: true,
}
}
4.2 Initialization¶
func NewClient(cfg Config) (*Client, error) {
binaryPath := cfg.BinaryPath
if binaryPath == "" {
// Search in PATH
path, err := exec.LookPath("codeql")
if err != nil {
return nil, fmt.Errorf("codeql not found in PATH: %w", err)
}
binaryPath = path
}
// Verify it works
cmd := exec.Command(binaryPath, "version")
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("codeql version check failed: %w", err)
}
return &Client{
binaryPath: binaryPath,
timeout: cfg.Timeout,
queriesDir: cfg.QueriesDir,
cacheEnabled: cfg.Cache,
}, nil
}
4.3 Availability Check¶
5. Executed Commands¶
5.1 Get Version¶
func (c *Client) Version(ctx context.Context) (string, error) {
cmd := exec.CommandContext(ctx, c.binaryPath, "version", "--format=json")
output, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("codeql version failed: %w", err)
}
var result struct {
Version string `json:"version"`
}
if err := json.Unmarshal(output, &result); err != nil {
return strings.TrimSpace(string(output)), nil
}
return result.Version, nil
}
Executed command:
5.2 Create Database¶
func (c *Client) CreateDatabase(ctx context.Context, sourcePath, dbPath, language string) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
args := []string{
"database", "create",
dbPath,
"--language=" + language,
"--source-root=" + sourcePath,
"--overwrite",
}
cmd := exec.CommandContext(ctx, c.binaryPath, args...)
cmd.Dir = sourcePath
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("database create failed: %w, stderr: %s", err, stderr.String())
}
return nil
}
Executed command:
codeql database create /tmp/mcp-scan-codeql-xxx/db \
--language=python \
--source-root=/path/to/code \
--overwrite
Supported languages:
| Language | Value |
|----------|-------|
| Python | python |
| JavaScript | javascript |
| TypeScript | javascript (same extractor) |
| Go | go |
5.3 Analyze Database¶
func (c *Client) AnalyzeDatabase(ctx context.Context, dbPath, outputPath string, queries ...string) error {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
if len(queries) == 0 {
queries = c.getDefaultQueries(dbPath)
}
args := []string{
"database", "analyze",
dbPath,
"--format=sarifv2.1.0",
"--output=" + outputPath,
"--sarif-add-snippets",
"--threads=0", // Use all cores
}
args = append(args, queries...)
cmd := exec.CommandContext(ctx, c.binaryPath, args...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("database analyze failed: %w, stderr: %s", err, stderr.String())
}
return nil
}
Executed command:
codeql database analyze /tmp/mcp-scan-codeql-xxx/db \
--format=sarifv2.1.0 \
--output=/tmp/mcp-scan-codeql-xxx/results.sarif \
--sarif-add-snippets \
--threads=0 \
codeql/python-queries:codeql-suites/python-security-extended.qls
5.4 Complete Scan (Combined)¶
func (c *Client) ScanDirectory(ctx context.Context, sourcePath, language string, queries ...string) (*SARIFReport, error) {
// Create temporary directory
tmpDir, err := os.MkdirTemp("", "mcp-scan-codeql-*")
if err != nil {
return nil, fmt.Errorf("failed to create temp dir: %w", err)
}
if !c.cacheEnabled {
defer os.RemoveAll(tmpDir)
}
dbPath := filepath.Join(tmpDir, "db")
sarifPath := filepath.Join(tmpDir, "results.sarif")
// 1. Create database
if err := c.CreateDatabase(ctx, sourcePath, dbPath, language); err != nil {
return nil, err
}
// 2. Analyze
if err := c.AnalyzeDatabase(ctx, dbPath, sarifPath, queries...); err != nil {
return nil, err
}
// 3. Parse results
return ParseSARIFFile(sarifPath)
}
5.5 Run Individual Query¶
func (c *Client) RunQuery(ctx context.Context, dbPath, queryPath string) (*SARIFReport, error) {
ctx, cancel := context.WithTimeout(ctx, c.timeout)
defer cancel()
tmpDir, err := os.MkdirTemp("", "codeql-query-*")
if err != nil {
return nil, err
}
defer os.RemoveAll(tmpDir)
sarifPath := filepath.Join(tmpDir, "results.sarif")
args := []string{
"database", "analyze",
dbPath,
queryPath,
"--format=sarifv2.1.0",
"--output=" + sarifPath,
"--sarif-add-snippets",
}
cmd := exec.CommandContext(ctx, c.binaryPath, args...)
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return nil, fmt.Errorf("query failed: %w, stderr: %s", err, stderr.String())
}
return ParseSARIFFile(sarifPath)
}
6. Query Suites¶
6.1 Default Suites¶
func (c *Client) getDefaultQueries(dbPath string) []string {
queries := []string{}
// Add custom queries if they exist
if c.queriesDir != "" {
queries = append(queries, c.queriesDir)
}
// Add standard security suites
queries = append(queries,
"codeql/go-queries:codeql-suites/go-security-extended.qls",
"codeql/python-queries:codeql-suites/python-security-extended.qls",
"codeql/javascript-queries:codeql-suites/javascript-security-extended.qls",
)
return queries
}
6.2 Available Suites¶
| Language | Suite | Coverage |
|---|---|---|
| Python | python-security-extended.qls |
Complete security |
| Python | python-security-and-quality.qls |
Security + quality |
| JavaScript | javascript-security-extended.qls |
Complete security |
| JavaScript | javascript-security-and-quality.qls |
Security + quality |
| Go | go-security-extended.qls |
Complete security |
| Go | go-security-and-quality.qls |
Security + quality |
6.3 MCP-Relevant Queries¶
| Query | Language | Detects |
|---|---|---|
py/command-line-injection |
Python | RCE via subprocess |
py/code-injection |
Python | eval/exec with input |
py/path-injection |
Python | Path traversal |
py/sql-injection |
Python | SQLi |
py/ssrf |
Python | SSRF |
py/clear-text-logging-sensitive-data |
Python | Secret logging |
js/command-line-injection |
JS/TS | RCE via child_process |
js/code-injection |
JS/TS | eval with input |
js/path-injection |
JS/TS | Path traversal |
js/sql-injection |
JS/TS | SQLi |
js/request-forgery |
JS/TS | SSRF |
7. SARIF Results Parsing¶
7.1 SARIF Structure¶
type SARIFReport struct {
Schema string `json:"$schema"`
Version string `json:"version"`
Runs []Run `json:"runs"`
}
type Run struct {
Tool Tool `json:"tool"`
Results []Result `json:"results"`
}
type Result struct {
RuleID string `json:"ruleId"`
Level string `json:"level"`
Message Message `json:"message"`
Locations []Location `json:"locations"`
}
7.2 File Parsing¶
func ParseSARIFFile(path string) (*SARIFReport, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read SARIF: %w", err)
}
var report SARIFReport
if err := json.Unmarshal(data, &report); err != nil {
return nil, fmt.Errorf("failed to parse SARIF: %w", err)
}
return &report, nil
}
8. Finding Merging¶
8.1 Merge Strategy¶
CodeQL findings are merged with mcp-scan findings:
func MergeFindings(mcpFindings []types.Finding, codeqlReport *codeql.SARIFReport) []types.Finding {
merged := make([]types.Finding, 0, len(mcpFindings))
// 1. Add mcp-scan findings
for _, f := range mcpFindings {
merged = append(merged, f)
}
// 2. Add CodeQL findings that are not duplicates
for _, run := range codeqlReport.Runs {
for _, result := range run.Results {
finding := convertCodeQLResult(result)
// Check if it already exists
if !isDuplicate(merged, finding) {
// Mark as confirmed by CodeQL
finding.Evidence.CodeQLConfirmed = true
merged = append(merged, finding)
}
}
}
// 3. Mark mcp-scan findings confirmed by CodeQL
for i, f := range merged {
if hasCodeQLConfirmation(codeqlReport, f) {
merged[i].Evidence.CodeQLConfirmed = true
// Elevate confidence if it was medium
if merged[i].Confidence == types.ConfidenceMedium {
merged[i].Confidence = types.ConfidenceHigh
}
}
}
return merged
}
8.2 CodeQL Result Conversion¶
func convertCodeQLResult(result codeql.Result) types.Finding {
loc := result.Locations[0].PhysicalLocation
// Map severity level
severity := mapCodeQLLevel(result.Level)
// Map rule ID to vulnerability class
class := mapCodeQLRule(result.RuleID)
return types.Finding{
RuleID: "CODEQL-" + result.RuleID,
Severity: severity,
Confidence: types.ConfidenceHigh, // CodeQL has high precision
Class: class,
Location: types.Location{
File: loc.ArtifactLocation.URI,
StartLine: loc.Region.StartLine,
EndLine: loc.Region.EndLine,
},
Description: result.Message.Text,
Evidence: types.Evidence{
Snippet: loc.Region.Snippet.Text,
CodeQLConfirmed: true,
},
}
}
func mapCodeQLLevel(level string) types.Severity {
switch level {
case "error":
return types.SeverityCritical
case "warning":
return types.SeverityHigh
case "note":
return types.SeverityMedium
default:
return types.SeverityInfo
}
}
8.3 Duplicate Detection¶
func isDuplicate(findings []types.Finding, candidate types.Finding) bool {
for _, f := range findings {
// Same file and similar line
if f.Location.File == candidate.Location.File &&
abs(f.Location.StartLine - candidate.Location.StartLine) <= 2 {
// Same vulnerability class
if f.Class == candidate.Class {
return true
}
}
}
return false
}
9. Configuration¶
9.1 In .mcp-scan.yaml¶
codeql:
enabled: true
binary_path: "" # Empty = search in PATH
timeout: "30m"
cache: true
queries_dir: "./custom-queries" # Optional
# Additional suites
extra_suites:
- "my-custom-suite.qls"
9.2 Environment Variables¶
# Alternative to config file
export MCP_SCAN_CODEQL_ENABLED=true
export MCP_SCAN_CODEQL_PATH=/path/to/codeql
export MCP_SCAN_CODEQL_TIMEOUT=30m
10. Supported Languages¶
func (c *Client) SupportedLanguages(ctx context.Context) ([]string, error) {
cmd := exec.CommandContext(ctx, c.binaryPath, "resolve", "languages")
output, err := cmd.Output()
if err != nil {
return nil, err
}
var languages []string
for _, line := range strings.Split(string(output), "\n") {
line = strings.TrimSpace(line)
if line != "" && !strings.HasPrefix(line, "#") {
parts := strings.Fields(line)
if len(parts) > 0 {
languages = append(languages, parts[0])
}
}
}
return languages, nil
}
Executed command:
Typical output:
cpp (cpp-queries)
csharp (csharp-queries)
go (go-queries)
java (java-queries)
javascript (javascript-queries)
python (python-queries)
ruby (ruby-queries)
swift (swift-queries)
11. Advantages and Disadvantages¶
11.1 CodeQL Advantages¶
- High precision: Very few false positives
- Deep analysis: Follows flow between functions/files
- Proven queries: Maintained by GitHub Security Lab
- SARIF standard: Easy integration
- Extensible: Allows custom queries
11.2 Disadvantages¶
- Slow: Can take minutes
- Heavy: Requires creating complete database
- External dependency: Requires CodeQL CLI installed
- Limited languages: Not all languages supported
- Not MCP-aware: Doesn't understand MCP surface specifically
11.3 When to Use CodeQL¶
Recommended: - Deep analysis for certification - Confirmation of critical findings - Large projects with multiple files - When high confidence is needed
Not recommended: - Fast CI/CD (use fast mode without CodeQL) - Small single-file projects - When Ollama is not available
12. Troubleshooting¶
12.1 CodeQL Not Found¶
Error:
Solution:
12.2 DB Creation Fails¶
Error:
Possible causes: 1. Empty directory 2. Incorrect language 3. Files excluded by .gitignore
Solution:
# Verify files
find /path/to/code -name "*.py" | wc -l
# Specify correct language
codeql database create db --language=python --source-root=/path/to/code
12.3 Analysis Timeout¶
Error:
Solution:
12.4 Queries Not Found¶
Error:
Solution:
13. Complete Flow Example¶
13.1 Code to Analyze¶
# server.py
import subprocess
from mcp import Server
server = Server()
@server.tool()
def run_command(cmd: str):
"""Execute a shell command."""
result = subprocess.run(cmd, shell=True, capture_output=True)
return result.stdout.decode()
13.2 Execution¶
13.3 Internal Process¶
- mcp-scan detects (Pattern + Taint):
MCP-A001: Tool input flows to command execution-
Confidence: High
-
CodeQL analyzes:
-
CodeQL finds:
py/command-line-injection-
Level: error
-
Merge:
- mcp-scan finding marked as
CodeQLConfirmed: true - Confidence elevated if it was Medium
13.4 Final Output¶
{
"findings": [
{
"rule_id": "MCP-A001",
"severity": "critical",
"confidence": "high",
"location": {
"file": "server.py",
"line": 10
},
"description": "Tool input flows to command execution",
"evidence": {
"snippet": "subprocess.run(cmd, shell=True, ...)",
"codeql_confirmed": true
}
}
],
"codeql_version": "2.15.1",
"codeql_queries_run": ["python-security-extended.qls"]
}
Next document: vulnerability-classes.md