Skip to content

Type Inference System

Overview

The type inference system (internal/typeinfo/) provides type information for variables and expressions to improve taint analysis precision. It helps reduce false positives by understanding when type conversions make certain attacks impossible.

Architecture

┌─────────────────────────────────────────────────────────────┐
│                    Type Inference System                      │
├─────────────────────────────────────────────────────────────┤
│                                                               │
│  ┌───────────────┐    ┌───────────────┐    ┌──────────────┐ │
│  │ TypeContext   │    │   Inferrer    │    │  TypeInfo    │ │
│  │ (Variable →   │◄───│  (Language-   │◄───│  (Type       │ │
│  │  TypeInfo)    │    │   specific)   │    │   metadata)  │ │
│  └───────────────┘    └───────────────┘    └──────────────┘ │
│         │                     │                              │
│         ▼                     ▼                              │
│  ┌───────────────┐    ┌───────────────┐                     │
│  │ FunctionType  │    │ InferrerFac-  │                     │
│  │ (params →     │    │   tory        │                     │
│  │  return)      │    │               │                     │
│  └───────────────┘    └───────────────┘                     │
│                                                               │
└─────────────────────────────────────────────────────────────┘

Key Types

TypeKind

Enumeration of supported types:

const (
    TypeString   TypeKind = "string"   // String/text data
    TypeInt      TypeKind = "int"      // Integer numbers
    TypeFloat    TypeKind = "float"    // Floating-point numbers
    TypeBool     TypeKind = "bool"     // Boolean values
    TypeList     TypeKind = "list"     // Arrays/lists
    TypeDict     TypeKind = "dict"     // Dictionaries/maps
    TypeObject   TypeKind = "object"   // Class instances
    TypeCallable TypeKind = "callable" // Functions/methods
    TypeNone     TypeKind = "none"     // None/null/undefined
    TypeAny      TypeKind = "any"      // Dynamic/any type
    TypeUnknown  TypeKind = "unknown"  // Unknown type
)

TypeInfo

Complete type information:

type TypeInfo struct {
    Kind        TypeKind   // The base type
    ElementType *TypeInfo  // For List: element type
    KeyType     *TypeInfo  // For Dict: key type
    ValueType   *TypeInfo  // For Dict: value type
    ClassName   string     // For Object: class name
    Generic     []*TypeInfo // Generic type parameters
    Nullable    bool       // Can be null/None
}

TypeContext

Tracks types in a scope:

type TypeContext struct {
    Variables map[string]*TypeInfo     // Variable → type
    Functions map[string]*FunctionType // Function → signature
    Classes   map[string]*ClassType    // Class → definition
    Parent    *TypeContext             // For nested scopes
}

Inference Rules

Python Type Inference

Literals

Expression Inferred Type
"hello" string
42 int
3.14 float
True bool
None none
[1, 2, 3] list[int]
{"a": 1} dict[string, int]

Built-in Functions

Function Return Type
int(x) int
str(x) string
float(x) float
bool(x) bool
list(x) list
dict(x) dict
len(x) int
input() string

Method Calls

Method Return Type
str.upper() string
str.lower() string
str.strip() string
list.append() none
list.pop() element type
dict.get() value type (nullable)
dict.keys() list[key type]

TypeScript/JavaScript Type Inference

Literals

Expression Inferred Type
"hello" string
42 int
3.14 float
true bool
null none
undefined none
[1, 2, 3] list[int]
{a: 1} object

Built-in Functions

Function Return Type
parseInt(x) int
parseFloat(x) float
String(x) string
Number(x) float
Boolean(x) bool
Array.isArray(x) bool

Node.js Patterns

Pattern Type
process.env.VAR string (nullable)
req.body any
req.query dict
req.params dict

Use in Taint Analysis

Type-Aware Sanitization

Type conversions can break taint for certain sink categories:

# Example: int() breaks command injection taint
user_input = request.args.get("id")  # tainted string
user_id = int(user_input)            # tainted int

# Command injection NOT possible with int
os.system(f"lookup {user_id}")       # Safe (false positive avoided)

# SQL injection still possible
cursor.execute(f"SELECT * WHERE id={user_id}")  # Still dangerous

Implementation

// TypeInfo methods for taint analysis

// IsStringLike returns true if the type can carry string payloads
func (t *TypeInfo) IsStringLike() bool {
    return t.Kind == TypeString ||
           t.Kind == TypeAny ||
           t.Kind == TypeUnknown
}

// IsNumeric returns true for numeric types
func (t *TypeInfo) IsNumeric() bool {
    return t.Kind == TypeInt || t.Kind == TypeFloat
}

// IsSafeForExec returns true if type is safe for command execution
func (t *TypeInfo) IsSafeForExec() bool {
    return t.Kind == TypeInt ||
           t.Kind == TypeFloat ||
           t.Kind == TypeBool ||
           t.Kind == TypeNone
}

// IsSafeForSQL returns true if type is safe for SQL (without quotes)
func (t *TypeInfo) IsSafeForSQL() bool {
    return t.Kind == TypeInt ||
           t.Kind == TypeFloat ||
           t.Kind == TypeBool
}

API Reference

Creating Inferrers

// Create language-specific inferrer
inferrer := typeinfo.InferrerFactory(types.Python)

// Build context from file
file := parser.ParseFile("example.py", types.Python)
ctx := inferrer.BuildContext(file)

Inferring Types

// Infer expression type
expr := &ast.Call{Function: "int", Arguments: []ast.Expression{...}}
typeInfo := inferrer.InferExpression(expr, ctx)

// Check type properties
if typeInfo.IsSafeForExec() {
    // Skip command injection check
}

Working with Context

// Get variable type
varType := ctx.GetVariable("user_input")

// Set variable type (after assignment)
ctx.SetVariable("parsed_id", &TypeInfo{Kind: TypeInt})

// Create child scope (for functions, blocks)
childCtx := ctx.NewChild()

Configuration

Type inference is enabled by default. To configure:

# mcp-scan.yaml
analysis:
  type_inference:
    enabled: true
    strict: false  # If true, unknown types treated as string-like

Limitations

  1. No Flow Analysis: Types are inferred per-expression, not across control flow
  2. No Generic Inference: Generic type parameters not fully resolved
  3. No External Types: Types from imported libraries not inferred
  4. Conservative Unknown: When type cannot be determined, assumes string-like

Examples

Python Example

from flask import request

def handler():
    user_input = request.args.get("q")  # Inferred: string
    page_num = int(request.args.get("page", "1"))  # Inferred: int

    # Taint analysis knows page_num is int, safe for command
    # but user_input is string, dangerous for command

TypeScript Example

import express from 'express';

function handler(req: express.Request) {
    const query = req.query.q;           // Inferred: string
    const page = parseInt(req.query.p);  // Inferred: int

    // Type inference helps taint analysis
}

Extension

Adding Type Rules

// In internal/typeinfo/python.go

// Add new builtin function type
pythonBuiltinTypes["my_sanitizer"] = &TypeInfo{Kind: TypeString}

// Add method type rule
func (i *PythonInferrer) inferMethodCall(receiver *TypeInfo, method string) *TypeInfo {
    switch method {
    case "my_method":
        return &TypeInfo{Kind: TypeInt}
    }
    return nil
}

Custom Type Annotations

The system can be extended to read type annotations:

// Future: Parse Python type annotations
func parseAnnotation(annotation string) *TypeInfo {
    switch annotation {
    case "int":
        return &TypeInfo{Kind: TypeInt}
    case "str":
        return &TypeInfo{Kind: TypeString}
    case "List[int]":
        return &TypeInfo{
            Kind: TypeList,
            ElementType: &TypeInfo{Kind: TypeInt},
        }
    }
    return &TypeInfo{Kind: TypeUnknown}
}