MLP Extrapolation Failure: Consolidation Settlement¶

Train on [0-2 years] → Predict [0-10 years]

Physics: $S(t) = S_{\text{final}}(1 - e^{-\alpha t})$

In [1]:

Copied!





import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path

torch.manual_seed(42)
np.random.seed(42)

plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11

# Create figures directory
Path('../figures').mkdir(parents=True, exist_ok=True)
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path

torch.manual_seed(42)
np.random.seed(42)

plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 11

# Create figures directory
Path('../figures').mkdir(parents=True, exist_ok=True)

Generate Data¶

In [2]:

Copied!





# Physics parameters
S_final = 100.0  # mm
alpha = 0.5      # 1/year

def settlement(t, S_final=100.0, alpha=0.5):
    return S_final * (1.0 - np.exp(-alpha * t))

# Training data: 0-2 years
t_train = np.linspace(0, 2, 20)
S_train = settlement(t_train) + np.random.normal(0, 2, len(t_train))
S_train = np.maximum(S_train, 0)

# Full range: 0-10 years
t_full = np.linspace(0, 10, 200)
S_physics = settlement(t_full)

# Convert to tensors
t_train_t = torch.FloatTensor(t_train.reshape(-1, 1))
S_train_t = torch.FloatTensor(S_train.reshape(-1, 1))
t_full_t = torch.FloatTensor(t_full.reshape(-1, 1))

print(f"Training: {len(t_train)} points, t ∈ [0, {t_train.max():.0f}] years")
print(f"Testing: {len(t_full)} points, t ∈ [0, {t_full.max():.0f}] years")

# Plot training data
plt.figure(figsize=(10, 6))
plt.scatter(t_train, S_train, s=100, color='black', zorder=5, label='Training data')
plt.plot(t_full, S_physics, 'k--', linewidth=2, label='Physics', alpha=0.7)
plt.axvspan(0, 2, alpha=0.1, color='green', label='Training region')
plt.axvspan(2, 10, alpha=0.1, color='red', label='Extrapolation')
plt.xlabel('Time (years)')
plt.ylabel('Settlement (mm)')
plt.title('Training Data: 0-2 Years')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Physics parameters
S_final = 100.0  # mm
alpha = 0.5      # 1/year

def settlement(t, S_final=100.0, alpha=0.5):
    return S_final * (1.0 - np.exp(-alpha * t))

# Training data: 0-2 years
t_train = np.linspace(0, 2, 20)
S_train = settlement(t_train) + np.random.normal(0, 2, len(t_train))
S_train = np.maximum(S_train, 0)

# Full range: 0-10 years
t_full = np.linspace(0, 10, 200)
S_physics = settlement(t_full)

# Convert to tensors
t_train_t = torch.FloatTensor(t_train.reshape(-1, 1))
S_train_t = torch.FloatTensor(S_train.reshape(-1, 1))
t_full_t = torch.FloatTensor(t_full.reshape(-1, 1))

print(f"Training: {len(t_train)} points, t ∈ [0, {t_train.max():.0f}] years")
print(f"Testing: {len(t_full)} points, t ∈ [0, {t_full.max():.0f}] years")

# Plot training data
plt.figure(figsize=(10, 6))
plt.scatter(t_train, S_train, s=100, color='black', zorder=5, label='Training data')
plt.plot(t_full, S_physics, 'k--', linewidth=2, label='Physics', alpha=0.7)
plt.axvspan(0, 2, alpha=0.1, color='green', label='Training region')
plt.axvspan(2, 10, alpha=0.1, color='red', label='Extrapolation')
plt.xlabel('Time (years)')
plt.ylabel('Settlement (mm)')
plt.title('Training Data: 0-2 Years')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

Training: 20 points, t ∈ [0, 2] years
Testing: 200 points, t ∈ [0, 10] years

No description has been provided for this image

MLP Architecture¶

In [ ]:

Copied!





class MLP(nn.Module):
    def __init__(self, activation='relu'):
        super().__init__()
        self.fc1 = nn.Linear(1, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 1)
        
        # Better initialization for ReLU
        if activation == 'relu':
            nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
            nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        else:
            nn.init.xavier_normal_(self.fc1.weight)
            nn.init.xavier_normal_(self.fc2.weight)
        
        activations = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh(),
            'sigmoid': nn.Sigmoid(),
        }
        self.act = activations[activation]
        self.name = activation
    
    def forward(self, t):
        x = self.act(self.fc1(t))
        x = self.act(self.fc2(x))
        return self.fc3(x)

def train_model(activation, epochs=3000):
    model = MLP(activation)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        loss = criterion(model(t_train_t), S_train_t)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        pred = model(t_full_t).numpy().flatten()
    
    return model, pred
class MLP(nn.Module):
    def __init__(self, activation='relu'):
        super().__init__()
        self.fc1 = nn.Linear(1, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 1)
        
        # Better initialization for ReLU
        if activation == 'relu':
            nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
            nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        else:
            nn.init.xavier_normal_(self.fc1.weight)
            nn.init.xavier_normal_(self.fc2.weight)
        
        activations = {
            'relu': nn.ReLU(),
            'tanh': nn.Tanh(),
            'sigmoid': nn.Sigmoid(),
        }
        self.act = activations[activation]
        self.name = activation
    
    def forward(self, t):
        x = self.act(self.fc1(t))
        x = self.act(self.fc2(x))
        return self.fc3(x)

def train_model(activation, epochs=3000):
    model = MLP(activation)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.MSELoss()
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        loss = criterion(model(t_train_t), S_train_t)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        pred = model(t_full_t).numpy().flatten()
    
    return model, pred

Train All Models¶

In [ ]:

Copied!





activations = ['relu', 'tanh', 'sigmoid']
models = {}
predictions = {}

for act in activations:
    print(f"Training {act.upper()}...", end=' ')
    model, pred = train_model(act, epochs=3000)
    models[act] = model
    predictions[act] = pred
    print("Done")

print(f"\nAll {len(activations)} models trained")
activations = ['relu', 'tanh', 'sigmoid']
models = {}
predictions = {}

for act in activations:
    print(f"Training {act.upper()}...", end=' ')
    model, pred = train_model(act, epochs=3000)
    models[act] = model
    predictions[act] = pred
    print("Done")

print(f"\nAll {len(activations)} models trained")

Results: Training vs Extrapolation¶

In [5]:

Copied!





# Compute errors
train_mask = t_full <= 2.0
extrap_mask = t_full > 2.0

print(f"{'Activation':<10} {'Train RMSE':<12} {'Extrap RMSE':<12} {'Max S':<10} {'Exceeds?'}")
print("-" * 60)

for act in activations:
    pred = predictions[act]
    rmse_train = np.sqrt(np.mean((pred[train_mask] - S_physics[train_mask])**2))
    rmse_extrap = np.sqrt(np.mean((pred[extrap_mask] - S_physics[extrap_mask])**2))
    max_s = pred.max()
    exceeds = "YES" if max_s > S_final + 5 else "NO"
    
    print(f"{act.upper():<10} {rmse_train:>10.2f} mm {rmse_extrap:>10.2f} mm {max_s:>8.1f} mm {exceeds:>7}")

print(f"{'PHYSICS':<10} {'0.00 mm':>12} {'0.00 mm':>12} {S_physics[-1]:>8.1f} mm {'NO':>7}")
# Compute errors
train_mask = t_full <= 2.0
extrap_mask = t_full > 2.0

print(f"{'Activation':<10} {'Train RMSE':<12} {'Extrap RMSE':<12} {'Max S':<10} {'Exceeds?'}")
print("-" * 60)

for act in activations:
    pred = predictions[act]
    rmse_train = np.sqrt(np.mean((pred[train_mask] - S_physics[train_mask])**2))
    rmse_extrap = np.sqrt(np.mean((pred[extrap_mask] - S_physics[extrap_mask])**2))
    max_s = pred.max()
    exceeds = "YES" if max_s > S_final + 5 else "NO"
    
    print(f"{act.upper():<10} {rmse_train:>10.2f} mm {rmse_extrap:>10.2f} mm {max_s:>8.1f} mm {exceeds:>7}")

print(f"{'PHYSICS':<10} {'0.00 mm':>12} {'0.00 mm':>12} {S_physics[-1]:>8.1f} mm {'NO':>7}")

Activation Train RMSE   Extrap RMSE  Max S      Exceeds?
------------------------------------------------------------
RELU             2.99 mm     104.34 mm    291.0 mm     YES
TANH             1.59 mm      31.95 mm     60.4 mm      NO
SIGMOID          1.47 mm      31.72 mm     60.7 mm      NO
PHYSICS         0.00 mm      0.00 mm     99.3 mm      NO

Interpolation: Training Region [0-2 years]¶

In [6]:

Copied!





fig, axes = plt.subplots(1, 3, figsize=(18, 6))

colors = {
    'relu': '#E63946',
    'tanh': '#457B9D',
    'sigmoid': '#2A9D8F'
}

for idx, act in enumerate(activations):
    ax = axes[idx]
    
    # Only plot training region
    t_train_region = t_full[train_mask]
    S_train_region = S_physics[train_mask]
    pred_train_region = predictions[act][train_mask]
    
    ax.scatter(t_train, S_train, s=80, color='black', zorder=5, alpha=0.6, label='Data')
    ax.plot(t_train_region, S_train_region, 'k--', linewidth=3, label='Physics', alpha=0.7)
    ax.plot(t_train_region, pred_train_region, color=colors[act], linewidth=3, 
            label=f'{act.upper()}', alpha=0.9)
    
    rmse_train = np.sqrt(np.mean((pred_train_region - S_train_region)**2))
    
    ax.set_title(f'{act.upper()}: RMSE={rmse_train:.1f} mm', 
                 fontsize=13, fontweight='bold')
    ax.set_xlabel('Time (years)', fontsize=11)
    ax.set_ylabel('Settlement (mm)', fontsize=11)
    ax.legend(loc='lower right', fontsize=10)
    ax.grid(True, alpha=0.3)
    ax.set_xlim(-0.1, 2.1)
    ax.set_ylim(0, S_train_region.max()*1.1)

plt.suptitle('Interpolation: Training Region [0-2 years]', 
             fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../figures/interpolation.png', dpi=300, bbox_inches='tight')
plt.show()
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

colors = {
    'relu': '#E63946',
    'tanh': '#457B9D',
    'sigmoid': '#2A9D8F'
}

for idx, act in enumerate(activations):
    ax = axes[idx]
    
    # Only plot training region
    t_train_region = t_full[train_mask]
    S_train_region = S_physics[train_mask]
    pred_train_region = predictions[act][train_mask]
    
    ax.scatter(t_train, S_train, s=80, color='black', zorder=5, alpha=0.6, label='Data')
    ax.plot(t_train_region, S_train_region, 'k--', linewidth=3, label='Physics', alpha=0.7)
    ax.plot(t_train_region, pred_train_region, color=colors[act], linewidth=3, 
            label=f'{act.upper()}', alpha=0.9)
    
    rmse_train = np.sqrt(np.mean((pred_train_region - S_train_region)**2))
    
    ax.set_title(f'{act.upper()}: RMSE={rmse_train:.1f} mm', 
                 fontsize=13, fontweight='bold')
    ax.set_xlabel('Time (years)', fontsize=11)
    ax.set_ylabel('Settlement (mm)', fontsize=11)
    ax.legend(loc='lower right', fontsize=10)
    ax.grid(True, alpha=0.3)
    ax.set_xlim(-0.1, 2.1)
    ax.set_ylim(0, S_train_region.max()*1.1)

plt.suptitle('Interpolation: Training Region [0-2 years]', 
             fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../figures/interpolation.png', dpi=300, bbox_inches='tight')
plt.show()

Extrapolation: Full Range [0-10 years]¶

In [7]:

Copied!





fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for idx, act in enumerate(activations):
    ax = axes[idx]
    
    # Full range [0-10 years]
    ax.axvspan(0, 2, alpha=0.1, color='green', label='Training [0-2yr]')
    ax.axvspan(2, 10, alpha=0.1, color='red', label='Extrapolation [2-10yr]')
    ax.axhspan(S_final, S_final*1.3, alpha=0.1, color='red')
    
    ax.scatter(t_train, S_train, s=80, color='black', zorder=5, alpha=0.6, label='Data')
    ax.plot(t_full, S_physics, 'k--', linewidth=3, label='Physics', alpha=0.7)
    ax.plot(t_full, predictions[act], color=colors[act], linewidth=3, 
            label=f'{act.upper()}', alpha=0.9)
    
    ax.axhline(S_final, color='gray', linestyle=':', linewidth=2, alpha=0.5)
    ax.axvline(2.0, color='black', linestyle='-', linewidth=2, alpha=0.3)
    
    pred = predictions[act]
    rmse_extrap = np.sqrt(np.mean((pred[extrap_mask] - S_physics[extrap_mask])**2))
    
    ax.set_title(f'{act.upper()}: Extrap RMSE={rmse_extrap:.1f} mm', 
                 fontsize=13, fontweight='bold')
    ax.set_xlabel('Time (years)', fontsize=11)
    ax.set_ylabel('Settlement (mm)', fontsize=11)
    ax.legend(loc='lower right', fontsize=9)
    ax.grid(True, alpha=0.3)
    ax.set_xlim(-0.2, 10.2)
    ax.set_ylim(-5, S_final*1.3)

plt.suptitle('Extrapolation: Full Range [0-10 years]', 
             fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../figures/extrapolation.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n20-Year Predictions:")
print(f"{'Activation':<10} {'S(20yr)':<12} {'Error'}")
print("-" * 35)

t_ext = np.linspace(0, 20, 400)
S_ext_physics = settlement(t_ext)
t_ext_t = torch.FloatTensor(t_ext.reshape(-1, 1))

for act in activations:
    with torch.no_grad():
        pred_20 = models[act](torch.FloatTensor([[20.0]])).item()
    error = abs(pred_20 - S_ext_physics[-1])
    print(f"{act.upper():<10} {pred_20:>10.1f} mm {error:>8.1f} mm")
print(f"{'PHYSICS':<10} {S_ext_physics[-1]:>10.1f} mm {'0.0 mm':>8}")
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for idx, act in enumerate(activations):
    ax = axes[idx]
    
    # Full range [0-10 years]
    ax.axvspan(0, 2, alpha=0.1, color='green', label='Training [0-2yr]')
    ax.axvspan(2, 10, alpha=0.1, color='red', label='Extrapolation [2-10yr]')
    ax.axhspan(S_final, S_final*1.3, alpha=0.1, color='red')
    
    ax.scatter(t_train, S_train, s=80, color='black', zorder=5, alpha=0.6, label='Data')
    ax.plot(t_full, S_physics, 'k--', linewidth=3, label='Physics', alpha=0.7)
    ax.plot(t_full, predictions[act], color=colors[act], linewidth=3, 
            label=f'{act.upper()}', alpha=0.9)
    
    ax.axhline(S_final, color='gray', linestyle=':', linewidth=2, alpha=0.5)
    ax.axvline(2.0, color='black', linestyle='-', linewidth=2, alpha=0.3)
    
    pred = predictions[act]
    rmse_extrap = np.sqrt(np.mean((pred[extrap_mask] - S_physics[extrap_mask])**2))
    
    ax.set_title(f'{act.upper()}: Extrap RMSE={rmse_extrap:.1f} mm', 
                 fontsize=13, fontweight='bold')
    ax.set_xlabel('Time (years)', fontsize=11)
    ax.set_ylabel('Settlement (mm)', fontsize=11)
    ax.legend(loc='lower right', fontsize=9)
    ax.grid(True, alpha=0.3)
    ax.set_xlim(-0.2, 10.2)
    ax.set_ylim(-5, S_final*1.3)

plt.suptitle('Extrapolation: Full Range [0-10 years]', 
             fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../figures/extrapolation.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n20-Year Predictions:")
print(f"{'Activation':<10} {'S(20yr)':<12} {'Error'}")
print("-" * 35)

t_ext = np.linspace(0, 20, 400)
S_ext_physics = settlement(t_ext)
t_ext_t = torch.FloatTensor(t_ext.reshape(-1, 1))

for act in activations:
    with torch.no_grad():
        pred_20 = models[act](torch.FloatTensor([[20.0]])).item()
    error = abs(pred_20 - S_ext_physics[-1])
    print(f"{act.upper():<10} {pred_20:>10.1f} mm {error:>8.1f} mm")
print(f"{'PHYSICS':<10} {S_ext_physics[-1]:>10.1f} mm {'0.0 mm':>8}")

20-Year Predictions:
Activation S(20yr)      Error
-----------------------------------
RELU            573.5 mm    473.5 mm
TANH             60.4 mm     39.6 mm
SIGMOID          60.7 mm     39.3 mm
PHYSICS         100.0 mm   0.0 mm

Summary¶

Key Findings:

All models fit training data well (RMSE < 5mm)
All models fail in extrapolation
ReLU worst: linear extrapolation → unbounded growth
Bounded activations (Tanh, Sigmoid) better but still wrong

Lesson: Training accuracy ≠ Extrapolation reliability