PINNs 1D consolidation [L-BFGS]

In [3]:

Copied!





import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import grad
import time
import os
from tqdm import tqdm

# Set device (MPS for Apple Silicon, CUDA for NVIDIA, CPU as fallback)
def get_device():
    if torch.backends.mps.is_available():
        return torch.device("mps")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")
torch.manual_seed(42)

class OptimizedPINN(nn.Module):
    """PINN based on Zhang et al. (2024) paper architecture"""

    def __init__(self, layers=[2, 60, 60, 60, 60, 60, 1]):
        super(OptimizedPINN, self).__init__()
        self.layers = nn.ModuleList()

        # Build 5 hidden layer network as per paper
        for i in range(len(layers) - 1):
            self.layers.append(nn.Linear(layers[i], layers[i+1]))

        # Xavier initialization
        self.init_weights()

    def init_weights(self):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)
                nn.init.zeros_(layer.bias)

    def forward(self, x):
        # Input: [z, t] where z is normalized depth and t is time
        for i, layer in enumerate(self.layers[:-1]):
            x = torch.tanh(layer(x))  # Hyperbolic tangent as per paper
        x = self.layers[-1](x)  # Linear output
        return x

def compute_derivatives(model, z, t):
    """Compute derivatives for PDE"""
    zt = torch.cat([z, t], dim=1)
    zt.requires_grad_(True)

    u = model(zt)

    # First derivatives
    grads = grad(u.sum(), zt, create_graph=True)[0]
    u_z = grads[:, 0:1]
    u_t = grads[:, 1:2]

    # Second derivative w.r.t. z
    u_zz = grad(u_z.sum(), zt, create_graph=True)[0][:, 0:1]

    return u, u_t, u_z, u_zz

def physics_loss_lf(model, z_interior, t_interior, cv):
    """Physics loss Lf as per Zhang et al. paper"""
    u, u_t, u_z, u_zz = compute_derivatives(model, z_interior, t_interior)

    # PDE: ∂u/∂t = cv * ∂²u/∂z²
    pde_residual = u_t - cv * u_zz

    # L2 norm as per paper
    nf = len(z_interior)
    Lf = (1/nf) * torch.mean(pde_residual**2)

    return Lf

def known_data_loss_lk(model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0):
    """Known data loss Lk as per Zhang et al. paper"""

    # Boundary condition: u(0,t) = 0 (top drainage)
    zt_bc = torch.cat([z_bc, t_bc], dim=1)
    u_bc = model(zt_bc)

    # Bottom boundary: ∂u/∂z(H,t) = 0
    u_bot, u_t_bot, u_z_bot, u_zz_bot = compute_derivatives(model, z_bot, t_bot)

    # Initial condition: u(z,0) = u0
    zt_ic = torch.cat([z_ic, t_ic], dim=1)
    u_ic = model(zt_ic)
    u0_tensor = torch.full_like(u_ic, u0)

    # Compute individual loss components
    nk_bc = len(z_bc)
    nk_bot = len(z_bot)
    nk_ic = len(z_ic)

    bc_loss = (1/nk_bc) * torch.mean(u_bc**2)
    bottom_loss = (1/nk_bot) * torch.mean(u_z_bot**2)
    ic_loss = (1/nk_ic) * torch.mean((u_ic - u0_tensor)**2)

    # Combined Lk loss
    Lk = bc_loss + bottom_loss + ic_loss

    return Lk, bc_loss, bottom_loss, ic_loss

def analytical_solution(z, t, H, cv, u0, n_terms=100):
    """High-precision analytical solution"""
    u = np.zeros_like(z)
    for n in range(n_terms):
        m = 2*n + 1
        term = (4*u0/np.pi) * (np.sin(m*np.pi*z/(2*H))/m) * np.exp(-(m**2)*(np.pi**2)*cv*t/(4*H**2))
        u += term
        if np.max(np.abs(term)) < 1e-12:
            break
    return u

def generate_collocation_points(H, T, N_interior, N_bc, N_ic, device):
    """Generate collocation points for training"""

    # Interior points for physics loss
    z_interior = torch.rand(N_interior, 1, device=device) * H
    t_interior = torch.rand(N_interior, 1, device=device) * T

    # Boundary points (z=0, t>0) for drainage condition
    z_bc = torch.zeros(N_bc, 1, device=device)
    t_bc = torch.rand(N_bc, 1, device=device) * T

    # Bottom boundary points (z=H, t>0) for no-flow condition
    z_bot = torch.ones(N_bc, 1, device=device) * H
    t_bot = torch.rand(N_bc, 1, device=device) * T

    # Initial condition points (t=0, 0≤z≤H)
    z_ic = torch.rand(N_ic, 1, device=device) * H
    t_ic = torch.zeros(N_ic, 1, device=device)

    return z_interior, t_interior, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic

def train_optimized_pinn(model, H, T, cv, u0, epochs=8000, save_path="pinn_consolidation_model.pth"):
    """Training with L-BFGS optimizer as per Zhang et al."""

    # L-BFGS optimizer as used in the paper
    optimizer = optim.LBFGS(model.parameters(), lr=0.1, max_iter=20,
                           tolerance_grad=1e-8, tolerance_change=1e-10)

    # Collocation points
    N_interior = 2000  # Interior physics points
    N_bc = 200         # Boundary points
    N_ic = 200         # Initial condition points

    # Generate fixed collocation points (as per paper methodology)
    z_interior, t_interior, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic = generate_collocation_points(
        H, T, N_interior, N_bc, N_ic, device)

    losses = []
    loss_components = {'total': [], 'Lf': [], 'Lk': [], 'bc': [], 'bottom': [], 'ic': []}

    print("Starting optimized PINN training with L-BFGS...")
    start_time = time.time()

    # Create progress bar
    pbar = tqdm(range(epochs), desc="Training PINN",
                bar_format='{l_bar}{bar:30}{r_bar}{bar:-30b}',
                ncols=200)

    converged = False

    # Training loop with progress bar
    for epoch in pbar:

        def closure():
            optimizer.zero_grad()

            # Compute loss components as per Zhang et al.
            Lf = physics_loss_lf(model, z_interior, t_interior, cv)
            Lk, bc_loss, bottom_loss, ic_loss = known_data_loss_lk(
                model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0)

            # Total loss: L = wk*Lk + wf*Lf (with weights = 1.0 as per paper)
            wk, wf = 1.0, 1.0
            total_loss = wk * Lk + wf * Lf

            total_loss.backward()
            return total_loss

        # Compute losses for tracking
        Lf = physics_loss_lf(model, z_interior, t_interior, cv)
        Lk, bc_loss, bottom_loss, ic_loss = known_data_loss_lk(
            model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0)
        total_loss = Lk + Lf

        # Store loss components
        losses.append(total_loss.item())
        loss_components['total'].append(total_loss.item())
        loss_components['Lf'].append(Lf.item())
        loss_components['Lk'].append(Lk.item())
        loss_components['bc'].append(bc_loss.item())
        loss_components['bottom'].append(bottom_loss.item())
        loss_components['ic'].append(ic_loss.item())

        # L-BFGS step
        optimizer.step(closure)

        # Calculate max gradient for convergence check
        max_grad = 0
        for param in model.parameters():
            if param.grad is not None:
                max_grad = max(max_grad, param.grad.abs().max().item())

        # Update progress bar with current metrics
        pbar.set_postfix({
            'Total Loss': f'{total_loss.item():.2e}',
            'Lf': f'{Lf.item():.2e}',
            'Lk': f'{Lk.item():.2e}',
            'Max Grad': f'{max_grad:.2e}'
        })

        # Detailed progress reporting every 500 epochs
        if epoch % 500 == 0 and epoch > 0:
            tqdm.write(f"\nEpoch {epoch} Detailed Report:")
            tqdm.write(f"  Total Loss = {total_loss.item():.8f}")
            tqdm.write(f"  Lf (Physics) = {Lf.item():.8f}, Lk (Known) = {Lk.item():.8f}")
            tqdm.write(f"  BC = {bc_loss.item():.8f}, Bottom = {bottom_loss.item():.8f}, IC = {ic_loss.item():.8f}")
            tqdm.write(f"  Max Gradient = {max_grad:.2e}")

        # Early stopping if converged (max gradient < 1e-4 as per paper)
        if max_grad < 1e-4:
            tqdm.write(f"\n✅ Converged at epoch {epoch} with max gradient = {max_grad:.2e}")
            converged = True
            pbar.close()
            break

    if not converged:
        pbar.close()
        tqdm.write(f"\n⚠️  Training completed {epochs} epochs without convergence")

    end_time = time.time()
    tqdm.write(f"🏁 Training completed in {end_time - start_time:.2f} seconds")

    # Save the trained model
    model_info = {
        'model_state_dict': model.state_dict(),
        'losses': losses,
        'loss_components': loss_components,
        'parameters': {
            'H': H,
            'T': T,
            'cv': cv,
            'u0': u0
        },
        'training_info': {
            'epochs_trained': epoch + 1,
            'converged': converged,
            'final_loss': total_loss.item(),
            'training_time': end_time - start_time
        }
    }

    torch.save(model_info, save_path)
    tqdm.write(f"💾 Model saved to {save_path}")

    return losses, loss_components

def load_trained_model(model_path="pinn_consolidation_model.pth"):
    """Load a previously trained model"""

    if not os.path.exists(model_path):
        return None, None, None

    print(f"📁 Loading trained model from {model_path}")

    # Load model info
    model_info = torch.load(model_path, map_location=device)

    # Extract parameters
    params = model_info['parameters']
    H, T, cv, u0 = params['H'], params['T'], params['cv'], params['u0']

    # Initialize model with same architecture
    model = OptimizedPINN([2, 60, 60, 60, 60, 60, 1]).to(device)
    model.load_state_dict(model_info['model_state_dict'])
    model.eval()  # Set to evaluation mode

    # Print model info
    training_info = model_info['training_info']
    print(f"✅ Model loaded successfully!")
    print(f"   - Training epochs: {training_info['epochs_trained']}")
    print(f"   - Converged: {'Yes' if training_info['converged'] else 'No'}")
    print(f"   - Final loss: {training_info['final_loss']:.2e}")
    print(f"   - Training time: {training_info['training_time']:.2f} seconds")
    print(f"   - Parameters: H={H}, T={T}, cv={cv}, u0={u0}")

    return model, model_info, params

def create_plots(model, H, T, cv, u0):

    # Time instants as in the paper (converted to years, assuming their 4h = our 1 year scale)
    times_hours = [0.1, 0.5, 1.0, 2.0, 3.0, 4.0]  # hours in their paper
    times_years = [t/4.0 for t in times_hours]  # convert to our year scale

    # Depth points (normalized)
    z_normalized = np.linspace(0, 1, 100)  # x/h in paper notation
    z_actual = z_normalized * H

    # Colors for different time instants
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown']

    # Create the plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

    print("Generating Zhang et al. style plots...")

    # Progress bar for plot generation
    pbar = tqdm(enumerate(zip(times_years, times_hours, colors)),
                total=len(times_years), desc="Computing pressure profiles",
                leave=False, ncols=100)

    # Plot 1: Similar to Zhang et al. Figure 11a
    for i, (t_year, t_hour, color) in pbar:

        pbar.set_postfix({'time': f'{t_hour}h'})

        # Analytical solution
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)
        u_normalized_analytical = u_analytical / u0  # u/u0 normalization

        # PINN prediction
        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()
            u_normalized_pinn = u_pinn / u0  # u/u0 normalization

        # Plot with same style as paper
        ax1.plot(u_normalized_analytical, z_normalized, '-', color=color, linewidth=2,
                label=f't={t_hour}h (Analytical)', alpha=0.8)
        ax1.plot(u_normalized_pinn, z_normalized, '--', color=color, linewidth=2,
                label=f't={t_hour}h (PINN)', alpha=0.7)

    pbar.close()

    ax1.set_xlabel('u/u₀')
    ax1.set_ylabel('z/H')
    ax1.set_title('Excess Pore Pressure Profiles\n(Similar to Zhang et al. Fig 11a)')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.invert_yaxis()  # Depth increases downward
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)

    # Plot 2: Error analysis
    print("Computing error analysis...")
    pbar2 = tqdm(enumerate(zip(times_years, times_hours, colors)),
                 total=len(times_years), desc="Computing error profiles",
                 leave=False, ncols=100)

    for i, (t_year, t_hour, color) in pbar2:
        pbar2.set_postfix({'time': f'{t_hour}h'})

        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)

        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()

        error = np.abs(u_pinn - u_analytical)
        ax2.semilogy(error, z_normalized, '-', color=color, linewidth=2, label=f't={t_hour}h')

    pbar2.close()

    ax2.set_xlabel('Absolute Error (kPa)')
    ax2.set_ylabel('z/H')
    ax2.set_title('Absolute Error in Pore Pressure')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.invert_yaxis()

    plt.tight_layout()
    plt.show()

    return times_years, z_normalized

def comprehensive_analysis(model, H, T, cv, u0):
    """Comprehensive analysis and comparison"""

    # Test at specific points as validation
    test_times = [0.1, 0.25, 0.5, 0.75, 1.0]
    test_depths = [0.0, 0.25, 0.5, 0.75, 1.0]

    print(f"\nPoint-wise Comparison:")
    print(f"{'Time':<8} {'Depth':<8} {'Analytical':<12} {'PINN':<12} {'Error':<12} {'Rel Error %':<12}")
    print("-" * 75)

    total_error = 0
    n_points = 0

    # Create progress bar for analysis
    total_combinations = len(test_times) * len(test_depths)
    pbar = tqdm(total=total_combinations, desc="Computing point-wise comparisons",
                leave=False, ncols=100)

    for t in test_times:
        for z_norm in test_depths:
            z_actual = z_norm * H

            # Analytical
            u_analytical = analytical_solution(np.array([z_actual]), t, H, cv, u0)[0]

            # PINN
            with torch.no_grad():
                z_tensor = torch.tensor([[z_actual]], dtype=torch.float32, device=device)
                t_tensor = torch.tensor([[t]], dtype=torch.float32, device=device)
                zt = torch.cat([z_tensor, t_tensor], dim=1)
                u_pinn = model(zt).cpu().numpy()[0, 0]

            error = abs(u_pinn - u_analytical)
            rel_error = error / (abs(u_analytical) + 1e-8) * 100

            print(f"{t:<8.2f} {z_norm:<8.2f} {u_analytical:<12.4f} {u_pinn:<12.4f} {error:<12.4f} {rel_error:<12.2f}")

            total_error += error
            n_points += 1
            pbar.update(1)

    pbar.close()

    print(f"\nOverall Statistics:")
    print(f"Mean Absolute Error: {total_error/n_points:.6f} kPa")
    print(f"Mean Relative Error: {(total_error/n_points)/u0*100:.4f}%")

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

def run_inference_analysis(model, H, T, cv, u0):
    """Run complete analysis using trained model"""

    print(f"\n Running inference analysis with trained model...")

    # Create plots and analysis
    times_years, z_normalized = create_plots(model, H, T, cv, u0)
    comprehensive_analysis(model, H, T, cv, u0)

    return times_years, z_normalized

def show_training_plots(losses, loss_components):
    """Show training history plots"""

    print("\n Creating training analysis plots...")
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

    # Plot 1: Total loss evolution
    ax1.semilogy(loss_components['total'])
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Total Loss')
    ax1.set_title('Total Loss Evolution (L = Lk + Lf)')
    ax1.grid(True)

    # Plot 2: Loss components
    ax2.semilogy(loss_components['Lf'], label='Lf (Physics)', alpha=0.8)
    ax2.semilogy(loss_components['Lk'], label='Lk (Known Data)', alpha=0.8)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss Components')
    ax2.set_title('Physics vs Known Data Loss')
    ax2.legend()
    ax2.grid(True)

    # Plot 3: Detailed loss breakdown
    ax3.semilogy(loss_components['bc'], label='Boundary Condition', alpha=0.7)
    ax3.semilogy(loss_components['bottom'], label='Bottom BC (∂u/∂z=0)', alpha=0.7)
    ax3.semilogy(loss_components['ic'], label='Initial Condition', alpha=0.7)
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Individual Loss Terms')
    ax3.set_title('Detailed Loss Component Breakdown')
    ax3.legend()
    ax3.grid(True)

    # Plot 4: Final validation - degree of consolidation
    print("Computing degree of consolidation curves...")
    t_eval = np.linspace(0.01, T, 50)
    U_analytical = []
    U_pinn = []

    # Progress bar for degree of consolidation calculation
    pbar_u = tqdm(t_eval, desc="Computing U vs time", leave=False, ncols=100)

    for t in pbar_u:
        # Analytical degree of consolidation
        z_full = np.linspace(0, H, 100)
        u_full_analytical = analytical_solution(z_full, t, H, cv, u0)
        u_avg_analytical = np.trapezoid(u_full_analytical, z_full) / H
        U_analytical.append(1 - u_avg_analytical / u0)

        # PINN degree of consolidation
        with torch.no_grad():
            z_tensor = torch.tensor(z_full.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_full, t).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_full_pinn = model(zt).cpu().numpy().flatten()
            u_avg_pinn = np.trapezoid(u_full_pinn, z_full) / H
            U_pinn.append(1 - u_avg_pinn / u0)

        pbar_u.set_postfix({'t': f'{t:.3f}', 'U_analytical': f'{U_analytical[-1]:.3f}', 'U_pinn': f'{U_pinn[-1]:.3f}'})

    pbar_u.close()

    ax4.plot(t_eval, U_analytical, 'r-', linewidth=3, label='Analytical', alpha=0.8)
    ax4.plot(t_eval, U_pinn, 'b--', linewidth=2, label='PINN (Zhang et al. method)')
    ax4.set_xlabel('Time (years)')
    ax4.set_ylabel('Degree of Consolidation, U')
    ax4.set_title('Degree of Consolidation Comparison')
    ax4.legend()
    ax4.grid(True)
    ax4.set_ylim(0, 1)

    plt.tight_layout()
    plt.show()

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

MODEL_PATH = "pinn_consolidation_model.pth"

print(f" Layer thickness (H): {H} m")
print(f" Coefficient of consolidation (cv): {cv} m²/year")
print(f" Initial excess pore pressure (u₀): {u0} kPa")
print(f" Simulation time: {T} years")

# Configuration - Set RETRAIN = True to force retraining
RETRAIN = False  # Change this to True if you want to retrain

if not RETRAIN:
    # Try to load existing model first
    model, model_info, loaded_params = load_trained_model(MODEL_PATH)
else:
    model = None
    print(f" RETRAIN mode enabled - will train new model")

if model is not None and not RETRAIN:
    # Model loaded successfully - run inference only
    print(f"\n Using loaded model for inference and analysis")
    losses = model_info['losses']
    loss_components = model_info['loss_components']

    # Update parameters from loaded model
    H, T, cv, u0 = loaded_params['H'], loaded_params['T'], loaded_params['cv'], loaded_params['u0']

    # Run inference analysis
    times_years, z_normalized = run_inference_analysis(model, H, T, cv, u0)

    # Show training history
    show_training_plots(losses, loss_components)

else:
    # No existing model or retrain requested, train a new one
    if not RETRAIN:
        print(f"\n No existing model found. Training new PINN model...")

    # Initialize model with Zhang et al. architecture
    model = OptimizedPINN([2, 60, 60, 60, 60, 60, 1]).to(device)

    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model initialized with {total_params:,} parameters")
    print(f"Using device: {device}")

    # Training with Zhang et al. methodology
    print(f"\n Starting training process...")
    losses, loss_components = train_optimized_pinn(model, H, T, cv, u0, epochs=6000, save_path=MODEL_PATH)

    # After training, run analysis
    print(f"\n Training complete! Running analysis...")
    times_years, z_normalized = run_inference_analysis(model, H, T, cv, u0)
    show_training_plots(losses, loss_components)
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import grad
import time
import os
from tqdm import tqdm

# Set device (MPS for Apple Silicon, CUDA for NVIDIA, CPU as fallback)
def get_device():
    if torch.backends.mps.is_available():
        return torch.device("mps")
    elif torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")
torch.manual_seed(42)

class OptimizedPINN(nn.Module):
    """PINN based on Zhang et al. (2024) paper architecture"""

    def __init__(self, layers=[2, 60, 60, 60, 60, 60, 1]):
        super(OptimizedPINN, self).__init__()
        self.layers = nn.ModuleList()

        # Build 5 hidden layer network as per paper
        for i in range(len(layers) - 1):
            self.layers.append(nn.Linear(layers[i], layers[i+1]))

        # Xavier initialization
        self.init_weights()

    def init_weights(self):
        for layer in self.layers:
            if isinstance(layer, nn.Linear):
                nn.init.xavier_normal_(layer.weight)
                nn.init.zeros_(layer.bias)

    def forward(self, x):
        # Input: [z, t] where z is normalized depth and t is time
        for i, layer in enumerate(self.layers[:-1]):
            x = torch.tanh(layer(x))  # Hyperbolic tangent as per paper
        x = self.layers[-1](x)  # Linear output
        return x

def compute_derivatives(model, z, t):
    """Compute derivatives for PDE"""
    zt = torch.cat([z, t], dim=1)
    zt.requires_grad_(True)

    u = model(zt)

    # First derivatives
    grads = grad(u.sum(), zt, create_graph=True)[0]
    u_z = grads[:, 0:1]
    u_t = grads[:, 1:2]

    # Second derivative w.r.t. z
    u_zz = grad(u_z.sum(), zt, create_graph=True)[0][:, 0:1]

    return u, u_t, u_z, u_zz

def physics_loss_lf(model, z_interior, t_interior, cv):
    """Physics loss Lf as per Zhang et al. paper"""
    u, u_t, u_z, u_zz = compute_derivatives(model, z_interior, t_interior)

    # PDE: ∂u/∂t = cv * ∂²u/∂z²
    pde_residual = u_t - cv * u_zz

    # L2 norm as per paper
    nf = len(z_interior)
    Lf = (1/nf) * torch.mean(pde_residual**2)

    return Lf

def known_data_loss_lk(model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0):
    """Known data loss Lk as per Zhang et al. paper"""

    # Boundary condition: u(0,t) = 0 (top drainage)
    zt_bc = torch.cat([z_bc, t_bc], dim=1)
    u_bc = model(zt_bc)

    # Bottom boundary: ∂u/∂z(H,t) = 0
    u_bot, u_t_bot, u_z_bot, u_zz_bot = compute_derivatives(model, z_bot, t_bot)

    # Initial condition: u(z,0) = u0
    zt_ic = torch.cat([z_ic, t_ic], dim=1)
    u_ic = model(zt_ic)
    u0_tensor = torch.full_like(u_ic, u0)

    # Compute individual loss components
    nk_bc = len(z_bc)
    nk_bot = len(z_bot)
    nk_ic = len(z_ic)

    bc_loss = (1/nk_bc) * torch.mean(u_bc**2)
    bottom_loss = (1/nk_bot) * torch.mean(u_z_bot**2)
    ic_loss = (1/nk_ic) * torch.mean((u_ic - u0_tensor)**2)

    # Combined Lk loss
    Lk = bc_loss + bottom_loss + ic_loss

    return Lk, bc_loss, bottom_loss, ic_loss

def analytical_solution(z, t, H, cv, u0, n_terms=100):
    """High-precision analytical solution"""
    u = np.zeros_like(z)
    for n in range(n_terms):
        m = 2*n + 1
        term = (4*u0/np.pi) * (np.sin(m*np.pi*z/(2*H))/m) * np.exp(-(m**2)*(np.pi**2)*cv*t/(4*H**2))
        u += term
        if np.max(np.abs(term)) < 1e-12:
            break
    return u

def generate_collocation_points(H, T, N_interior, N_bc, N_ic, device):
    """Generate collocation points for training"""

    # Interior points for physics loss
    z_interior = torch.rand(N_interior, 1, device=device) * H
    t_interior = torch.rand(N_interior, 1, device=device) * T

    # Boundary points (z=0, t>0) for drainage condition
    z_bc = torch.zeros(N_bc, 1, device=device)
    t_bc = torch.rand(N_bc, 1, device=device) * T

    # Bottom boundary points (z=H, t>0) for no-flow condition
    z_bot = torch.ones(N_bc, 1, device=device) * H
    t_bot = torch.rand(N_bc, 1, device=device) * T

    # Initial condition points (t=0, 0≤z≤H)
    z_ic = torch.rand(N_ic, 1, device=device) * H
    t_ic = torch.zeros(N_ic, 1, device=device)

    return z_interior, t_interior, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic

def train_optimized_pinn(model, H, T, cv, u0, epochs=8000, save_path="pinn_consolidation_model.pth"):
    """Training with L-BFGS optimizer as per Zhang et al."""

    # L-BFGS optimizer as used in the paper
    optimizer = optim.LBFGS(model.parameters(), lr=0.1, max_iter=20,
                           tolerance_grad=1e-8, tolerance_change=1e-10)

    # Collocation points
    N_interior = 2000  # Interior physics points
    N_bc = 200         # Boundary points
    N_ic = 200         # Initial condition points

    # Generate fixed collocation points (as per paper methodology)
    z_interior, t_interior, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic = generate_collocation_points(
        H, T, N_interior, N_bc, N_ic, device)

    losses = []
    loss_components = {'total': [], 'Lf': [], 'Lk': [], 'bc': [], 'bottom': [], 'ic': []}

    print("Starting optimized PINN training with L-BFGS...")
    start_time = time.time()

    # Create progress bar
    pbar = tqdm(range(epochs), desc="Training PINN",
                bar_format='{l_bar}{bar:30}{r_bar}{bar:-30b}',
                ncols=200)

    converged = False

    # Training loop with progress bar
    for epoch in pbar:

        def closure():
            optimizer.zero_grad()

            # Compute loss components as per Zhang et al.
            Lf = physics_loss_lf(model, z_interior, t_interior, cv)
            Lk, bc_loss, bottom_loss, ic_loss = known_data_loss_lk(
                model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0)

            # Total loss: L = wk*Lk + wf*Lf (with weights = 1.0 as per paper)
            wk, wf = 1.0, 1.0
            total_loss = wk * Lk + wf * Lf

            total_loss.backward()
            return total_loss

        # Compute losses for tracking
        Lf = physics_loss_lf(model, z_interior, t_interior, cv)
        Lk, bc_loss, bottom_loss, ic_loss = known_data_loss_lk(
            model, z_bc, t_bc, z_bot, t_bot, z_ic, t_ic, u0)
        total_loss = Lk + Lf

        # Store loss components
        losses.append(total_loss.item())
        loss_components['total'].append(total_loss.item())
        loss_components['Lf'].append(Lf.item())
        loss_components['Lk'].append(Lk.item())
        loss_components['bc'].append(bc_loss.item())
        loss_components['bottom'].append(bottom_loss.item())
        loss_components['ic'].append(ic_loss.item())

        # L-BFGS step
        optimizer.step(closure)

        # Calculate max gradient for convergence check
        max_grad = 0
        for param in model.parameters():
            if param.grad is not None:
                max_grad = max(max_grad, param.grad.abs().max().item())

        # Update progress bar with current metrics
        pbar.set_postfix({
            'Total Loss': f'{total_loss.item():.2e}',
            'Lf': f'{Lf.item():.2e}',
            'Lk': f'{Lk.item():.2e}',
            'Max Grad': f'{max_grad:.2e}'
        })

        # Detailed progress reporting every 500 epochs
        if epoch % 500 == 0 and epoch > 0:
            tqdm.write(f"\nEpoch {epoch} Detailed Report:")
            tqdm.write(f"  Total Loss = {total_loss.item():.8f}")
            tqdm.write(f"  Lf (Physics) = {Lf.item():.8f}, Lk (Known) = {Lk.item():.8f}")
            tqdm.write(f"  BC = {bc_loss.item():.8f}, Bottom = {bottom_loss.item():.8f}, IC = {ic_loss.item():.8f}")
            tqdm.write(f"  Max Gradient = {max_grad:.2e}")

        # Early stopping if converged (max gradient < 1e-4 as per paper)
        if max_grad < 1e-4:
            tqdm.write(f"\n✅ Converged at epoch {epoch} with max gradient = {max_grad:.2e}")
            converged = True
            pbar.close()
            break

    if not converged:
        pbar.close()
        tqdm.write(f"\n⚠️  Training completed {epochs} epochs without convergence")

    end_time = time.time()
    tqdm.write(f"🏁 Training completed in {end_time - start_time:.2f} seconds")

    # Save the trained model
    model_info = {
        'model_state_dict': model.state_dict(),
        'losses': losses,
        'loss_components': loss_components,
        'parameters': {
            'H': H,
            'T': T,
            'cv': cv,
            'u0': u0
        },
        'training_info': {
            'epochs_trained': epoch + 1,
            'converged': converged,
            'final_loss': total_loss.item(),
            'training_time': end_time - start_time
        }
    }

    torch.save(model_info, save_path)
    tqdm.write(f"💾 Model saved to {save_path}")

    return losses, loss_components

def load_trained_model(model_path="pinn_consolidation_model.pth"):
    """Load a previously trained model"""

    if not os.path.exists(model_path):
        return None, None, None

    print(f"📁 Loading trained model from {model_path}")

    # Load model info
    model_info = torch.load(model_path, map_location=device)

    # Extract parameters
    params = model_info['parameters']
    H, T, cv, u0 = params['H'], params['T'], params['cv'], params['u0']

    # Initialize model with same architecture
    model = OptimizedPINN([2, 60, 60, 60, 60, 60, 1]).to(device)
    model.load_state_dict(model_info['model_state_dict'])
    model.eval()  # Set to evaluation mode

    # Print model info
    training_info = model_info['training_info']
    print(f"✅ Model loaded successfully!")
    print(f"   - Training epochs: {training_info['epochs_trained']}")
    print(f"   - Converged: {'Yes' if training_info['converged'] else 'No'}")
    print(f"   - Final loss: {training_info['final_loss']:.2e}")
    print(f"   - Training time: {training_info['training_time']:.2f} seconds")
    print(f"   - Parameters: H={H}, T={T}, cv={cv}, u0={u0}")

    return model, model_info, params

def create_plots(model, H, T, cv, u0):

    # Time instants as in the paper (converted to years, assuming their 4h = our 1 year scale)
    times_hours = [0.1, 0.5, 1.0, 2.0, 3.0, 4.0]  # hours in their paper
    times_years = [t/4.0 for t in times_hours]  # convert to our year scale

    # Depth points (normalized)
    z_normalized = np.linspace(0, 1, 100)  # x/h in paper notation
    z_actual = z_normalized * H

    # Colors for different time instants
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown']

    # Create the plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

    print("Generating Zhang et al. style plots...")

    # Progress bar for plot generation
    pbar = tqdm(enumerate(zip(times_years, times_hours, colors)),
                total=len(times_years), desc="Computing pressure profiles",
                leave=False, ncols=100)

    # Plot 1: Similar to Zhang et al. Figure 11a
    for i, (t_year, t_hour, color) in pbar:

        pbar.set_postfix({'time': f'{t_hour}h'})

        # Analytical solution
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)
        u_normalized_analytical = u_analytical / u0  # u/u0 normalization

        # PINN prediction
        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()
            u_normalized_pinn = u_pinn / u0  # u/u0 normalization

        # Plot with same style as paper
        ax1.plot(u_normalized_analytical, z_normalized, '-', color=color, linewidth=2,
                label=f't={t_hour}h (Analytical)', alpha=0.8)
        ax1.plot(u_normalized_pinn, z_normalized, '--', color=color, linewidth=2,
                label=f't={t_hour}h (PINN)', alpha=0.7)

    pbar.close()

    ax1.set_xlabel('u/u₀')
    ax1.set_ylabel('z/H')
    ax1.set_title('Excess Pore Pressure Profiles\n(Similar to Zhang et al. Fig 11a)')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.invert_yaxis()  # Depth increases downward
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)

    # Plot 2: Error analysis
    print("Computing error analysis...")
    pbar2 = tqdm(enumerate(zip(times_years, times_hours, colors)),
                 total=len(times_years), desc="Computing error profiles",
                 leave=False, ncols=100)

    for i, (t_year, t_hour, color) in pbar2:
        pbar2.set_postfix({'time': f'{t_hour}h'})

        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)

        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()

        error = np.abs(u_pinn - u_analytical)
        ax2.semilogy(error, z_normalized, '-', color=color, linewidth=2, label=f't={t_hour}h')

    pbar2.close()

    ax2.set_xlabel('Absolute Error (kPa)')
    ax2.set_ylabel('z/H')
    ax2.set_title('Absolute Error in Pore Pressure')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.invert_yaxis()

    plt.tight_layout()
    plt.show()

    return times_years, z_normalized

def comprehensive_analysis(model, H, T, cv, u0):
    """Comprehensive analysis and comparison"""

    # Test at specific points as validation
    test_times = [0.1, 0.25, 0.5, 0.75, 1.0]
    test_depths = [0.0, 0.25, 0.5, 0.75, 1.0]

    print(f"\nPoint-wise Comparison:")
    print(f"{'Time':<8} {'Depth':<8} {'Analytical':<12} {'PINN':<12} {'Error':<12} {'Rel Error %':<12}")
    print("-" * 75)

    total_error = 0
    n_points = 0

    # Create progress bar for analysis
    total_combinations = len(test_times) * len(test_depths)
    pbar = tqdm(total=total_combinations, desc="Computing point-wise comparisons",
                leave=False, ncols=100)

    for t in test_times:
        for z_norm in test_depths:
            z_actual = z_norm * H

            # Analytical
            u_analytical = analytical_solution(np.array([z_actual]), t, H, cv, u0)[0]

            # PINN
            with torch.no_grad():
                z_tensor = torch.tensor([[z_actual]], dtype=torch.float32, device=device)
                t_tensor = torch.tensor([[t]], dtype=torch.float32, device=device)
                zt = torch.cat([z_tensor, t_tensor], dim=1)
                u_pinn = model(zt).cpu().numpy()[0, 0]

            error = abs(u_pinn - u_analytical)
            rel_error = error / (abs(u_analytical) + 1e-8) * 100

            print(f"{t:<8.2f} {z_norm:<8.2f} {u_analytical:<12.4f} {u_pinn:<12.4f} {error:<12.4f} {rel_error:<12.2f}")

            total_error += error
            n_points += 1
            pbar.update(1)

    pbar.close()

    print(f"\nOverall Statistics:")
    print(f"Mean Absolute Error: {total_error/n_points:.6f} kPa")
    print(f"Mean Relative Error: {(total_error/n_points)/u0*100:.4f}%")

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

def run_inference_analysis(model, H, T, cv, u0):
    """Run complete analysis using trained model"""

    print(f"\n Running inference analysis with trained model...")

    # Create plots and analysis
    times_years, z_normalized = create_plots(model, H, T, cv, u0)
    comprehensive_analysis(model, H, T, cv, u0)

    return times_years, z_normalized

def show_training_plots(losses, loss_components):
    """Show training history plots"""

    print("\n Creating training analysis plots...")
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

    # Plot 1: Total loss evolution
    ax1.semilogy(loss_components['total'])
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Total Loss')
    ax1.set_title('Total Loss Evolution (L = Lk + Lf)')
    ax1.grid(True)

    # Plot 2: Loss components
    ax2.semilogy(loss_components['Lf'], label='Lf (Physics)', alpha=0.8)
    ax2.semilogy(loss_components['Lk'], label='Lk (Known Data)', alpha=0.8)
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss Components')
    ax2.set_title('Physics vs Known Data Loss')
    ax2.legend()
    ax2.grid(True)

    # Plot 3: Detailed loss breakdown
    ax3.semilogy(loss_components['bc'], label='Boundary Condition', alpha=0.7)
    ax3.semilogy(loss_components['bottom'], label='Bottom BC (∂u/∂z=0)', alpha=0.7)
    ax3.semilogy(loss_components['ic'], label='Initial Condition', alpha=0.7)
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Individual Loss Terms')
    ax3.set_title('Detailed Loss Component Breakdown')
    ax3.legend()
    ax3.grid(True)

    # Plot 4: Final validation - degree of consolidation
    print("Computing degree of consolidation curves...")
    t_eval = np.linspace(0.01, T, 50)
    U_analytical = []
    U_pinn = []

    # Progress bar for degree of consolidation calculation
    pbar_u = tqdm(t_eval, desc="Computing U vs time", leave=False, ncols=100)

    for t in pbar_u:
        # Analytical degree of consolidation
        z_full = np.linspace(0, H, 100)
        u_full_analytical = analytical_solution(z_full, t, H, cv, u0)
        u_avg_analytical = np.trapezoid(u_full_analytical, z_full) / H
        U_analytical.append(1 - u_avg_analytical / u0)

        # PINN degree of consolidation
        with torch.no_grad():
            z_tensor = torch.tensor(z_full.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_full, t).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_full_pinn = model(zt).cpu().numpy().flatten()
            u_avg_pinn = np.trapezoid(u_full_pinn, z_full) / H
            U_pinn.append(1 - u_avg_pinn / u0)

        pbar_u.set_postfix({'t': f'{t:.3f}', 'U_analytical': f'{U_analytical[-1]:.3f}', 'U_pinn': f'{U_pinn[-1]:.3f}'})

    pbar_u.close()

    ax4.plot(t_eval, U_analytical, 'r-', linewidth=3, label='Analytical', alpha=0.8)
    ax4.plot(t_eval, U_pinn, 'b--', linewidth=2, label='PINN (Zhang et al. method)')
    ax4.set_xlabel('Time (years)')
    ax4.set_ylabel('Degree of Consolidation, U')
    ax4.set_title('Degree of Consolidation Comparison')
    ax4.legend()
    ax4.grid(True)
    ax4.set_ylim(0, 1)

    plt.tight_layout()
    plt.show()

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

MODEL_PATH = "pinn_consolidation_model.pth"

print(f" Layer thickness (H): {H} m")
print(f" Coefficient of consolidation (cv): {cv} m²/year")
print(f" Initial excess pore pressure (u₀): {u0} kPa")
print(f" Simulation time: {T} years")

# Configuration - Set RETRAIN = True to force retraining
RETRAIN = False  # Change this to True if you want to retrain

if not RETRAIN:
    # Try to load existing model first
    model, model_info, loaded_params = load_trained_model(MODEL_PATH)
else:
    model = None
    print(f" RETRAIN mode enabled - will train new model")

if model is not None and not RETRAIN:
    # Model loaded successfully - run inference only
    print(f"\n Using loaded model for inference and analysis")
    losses = model_info['losses']
    loss_components = model_info['loss_components']

    # Update parameters from loaded model
    H, T, cv, u0 = loaded_params['H'], loaded_params['T'], loaded_params['cv'], loaded_params['u0']

    # Run inference analysis
    times_years, z_normalized = run_inference_analysis(model, H, T, cv, u0)

    # Show training history
    show_training_plots(losses, loss_components)

else:
    # No existing model or retrain requested, train a new one
    if not RETRAIN:
        print(f"\n No existing model found. Training new PINN model...")

    # Initialize model with Zhang et al. architecture
    model = OptimizedPINN([2, 60, 60, 60, 60, 60, 1]).to(device)

    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model initialized with {total_params:,} parameters")
    print(f"Using device: {device}")

    # Training with Zhang et al. methodology
    print(f"\n Starting training process...")
    losses, loss_components = train_optimized_pinn(model, H, T, cv, u0, epochs=6000, save_path=MODEL_PATH)

    # After training, run analysis
    print(f"\n Training complete! Running analysis...")
    times_years, z_normalized = run_inference_analysis(model, H, T, cv, u0)
    show_training_plots(losses, loss_components)

Using device: cuda
 Layer thickness (H): 1.0 m
 Coefficient of consolidation (cv): 1.0 m²/year
 Initial excess pore pressure (u₀): 100.0 kPa
 Simulation time: 1.0 years

 No existing model found. Training new PINN model...
Model initialized with 14,881 parameters
Using device: cuda

 Starting training process...
Starting optimized PINN training with L-BFGS...

Training PINN:   8%|██▌                           | 501/6000 [04:03<47:46,  1.92it/s, Total Loss=6.62e-04, Lf=5.64e-04, Lk=9.85e-05, Max Grad=4.62e-03]

Epoch 500 Detailed Report:
  Total Loss = 0.00066203
  Lf (Physics) = 0.00056355, Lk (Known) = 0.00009848
  BC = 0.00004680, Bottom = 0.00004517, IC = 0.00000651
  Max Gradient = 4.62e-03

Training PINN:  17%|█████                         | 1001/6000 [07:57<38:53,  2.14it/s, Total Loss=4.90e-04, Lf=3.97e-04, Lk=9.32e-05, Max Grad=3.62e-02]

Epoch 1000 Detailed Report:
  Total Loss = 0.00049022
  Lf (Physics) = 0.00039699, Lk (Known) = 0.00009323
  BC = 0.00004689, Bottom = 0.00004190, IC = 0.00000444
  Max Gradient = 3.62e-02

Training PINN:  25%|███████▌                      | 1501/6000 [11:53<39:12,  1.91it/s, Total Loss=4.89e-04, Lf=3.96e-04, Lk=9.35e-05, Max Grad=3.18e-02]

Epoch 1500 Detailed Report:
  Total Loss = 0.00048935
  Lf (Physics) = 0.00039584, Lk (Known) = 0.00009351
  BC = 0.00004676, Bottom = 0.00004224, IC = 0.00000451
  Max Gradient = 3.18e-02

Training PINN:  33%|██████████                    | 2001/6000 [15:48<29:31,  2.26it/s, Total Loss=4.37e-04, Lf=3.58e-04, Lk=7.90e-05, Max Grad=2.95e-02]

Epoch 2000 Detailed Report:
  Total Loss = 0.00043737
  Lf (Physics) = 0.00035840, Lk (Known) = 0.00007897
  BC = 0.00004070, Bottom = 0.00003505, IC = 0.00000322
  Max Gradient = 2.95e-02

Training PINN:  42%|████████████▌                 | 2501/6000 [19:44<30:11,  1.93it/s, Total Loss=4.37e-04, Lf=3.58e-04, Lk=7.86e-05, Max Grad=1.92e-02]

Epoch 2500 Detailed Report:
  Total Loss = 0.00043662
  Lf (Physics) = 0.00035798, Lk (Known) = 0.00007865
  BC = 0.00004071, Bottom = 0.00003486, IC = 0.00000308
  Max Gradient = 1.92e-02

Training PINN:  50%|███████████████               | 3001/6000 [23:45<25:36,  1.95it/s, Total Loss=4.36e-04, Lf=3.58e-04, Lk=7.83e-05, Max Grad=1.03e-02]

Epoch 3000 Detailed Report:
  Total Loss = 0.00043601
  Lf (Physics) = 0.00035769, Lk (Known) = 0.00007832
  BC = 0.00004064, Bottom = 0.00003461, IC = 0.00000307
  Max Gradient = 1.03e-02

Training PINN:  58%|█████████████████▌            | 3501/6000 [27:39<16:27,  2.53it/s, Total Loss=4.35e-04, Lf=3.57e-04, Lk=7.81e-05, Max Grad=1.10e-02]

Epoch 3500 Detailed Report:
  Total Loss = 0.00043548
  Lf (Physics) = 0.00035736, Lk (Known) = 0.00007812
  BC = 0.00004058, Bottom = 0.00003450, IC = 0.00000304
  Max Gradient = 1.10e-02

Training PINN:  67%|████████████████████          | 4001/6000 [31:41<15:24,  2.16it/s, Total Loss=4.35e-04, Lf=3.57e-04, Lk=7.79e-05, Max Grad=6.58e-04]

Epoch 4000 Detailed Report:
  Total Loss = 0.00043498
  Lf (Physics) = 0.00035713, Lk (Known) = 0.00007786
  BC = 0.00004053, Bottom = 0.00003430, IC = 0.00000303
  Max Gradient = 6.58e-04

Training PINN:  75%|██████████████████████▌       | 4501/6000 [35:32<11:28,  2.18it/s, Total Loss=4.35e-04, Lf=3.57e-04, Lk=7.77e-05, Max Grad=4.72e-03]

Epoch 4500 Detailed Report:
  Total Loss = 0.00043456
  Lf (Physics) = 0.00035689, Lk (Known) = 0.00007767
  BC = 0.00004048, Bottom = 0.00003418, IC = 0.00000301
  Max Gradient = 4.72e-03

Training PINN:  83%|█████████████████████████     | 5001/6000 [39:23<06:44,  2.47it/s, Total Loss=4.34e-04, Lf=3.57e-04, Lk=7.75e-05, Max Grad=7.94e-03]

Epoch 5000 Detailed Report:
  Total Loss = 0.00043422
  Lf (Physics) = 0.00035668, Lk (Known) = 0.00007754
  BC = 0.00004040, Bottom = 0.00003412, IC = 0.00000302
  Max Gradient = 7.94e-03

Training PINN:  92%|███████████████████████████▌  | 5501/6000 [43:21<03:07,  2.66it/s, Total Loss=4.34e-04, Lf=3.56e-04, Lk=7.74e-05, Max Grad=7.81e-04]

Epoch 5500 Detailed Report:
  Total Loss = 0.00043378
  Lf (Physics) = 0.00035638, Lk (Known) = 0.00007739
  BC = 0.00004034, Bottom = 0.00003403, IC = 0.00000302
  Max Gradient = 7.81e-04

Training PINN: 100%|██████████████████████████████| 6000/6000 [47:10<00:00,  2.12it/s, Total Loss=4.34e-04, Lf=3.57e-04, Lk=7.71e-05, Max Grad=1.31e-02]

⚠️  Training completed 6000 epochs without convergence
🏁 Training completed in 2830.19 seconds
💾 Model saved to pinn_consolidation_model.pth

 Training complete! Running analysis...

 Running inference analysis with trained model...
Generating Zhang et al. style plots...

Computing error analysis...

No description has been provided for this image

Point-wise Comparison:
Time     Depth    Analytical   PINN         Error        Rel Error % 
---------------------------------------------------------------------------

0.10     0.00     0.0000       -0.1473      0.1473       1473112106.32
0.10     0.25     42.3759      42.7677      0.3918       0.92        
0.10     0.50     73.5651      74.1243      0.5591       0.76        
0.10     0.75     90.1279      90.4945      0.3666       0.41        
0.10     1.00     94.9305      95.1694      0.2389       0.25        
0.25     0.00     0.0000       0.0838       0.0838       838384628.30
0.25     0.25     26.4461      26.6060      0.1599       0.60        
0.25     0.50     48.7013      48.9257      0.2244       0.46        
0.25     0.75     63.4161      63.7170      0.3009       0.47        
0.25     1.00     68.5446      68.8684      0.3238       0.47        
0.50     0.00     0.0000       -0.0426      0.0426       426206588.75
0.50     0.25     14.1899      14.2404      0.0505       0.36        
0.50     0.50     26.2188      26.3569      0.1381       0.53        
0.50     0.75     34.2557      34.4410      0.1853       0.54        
0.50     1.00     37.0777      37.2754      0.1976       0.53        
0.75     0.00     0.0000       -0.0157      0.0157       157423019.41
0.75     0.25     7.6571       7.6836       0.0265       0.35        
0.75     0.50     14.1485      14.1936      0.0451       0.32        
0.75     0.75     18.4859      18.5533      0.0673       0.36        
0.75     1.00     20.0090      20.0875      0.0785       0.39        
1.00     0.00     0.0000       0.0250       0.0250       249567031.86
1.00     0.25     4.1321       4.1493       0.0172       0.42        
1.00     0.50     7.6351       7.6606       0.0254       0.33        
1.00     0.75     9.9758       10.0262      0.0505       0.51        
1.00     1.00     10.7977      10.8316      0.0339       0.31        

Overall Statistics:
Mean Absolute Error: 0.151826 kPa
Mean Relative Error: 0.1518%

 Creating training analysis plots...
Computing degree of consolidation curves...

 Generating Zhang et al. style analysis...
Generating Zhang et al. style plots...

Computing error analysis...

Point-wise Comparison:
Time     Depth    Analytical   PINN         Error        Rel Error % 
---------------------------------------------------------------------------

0.10     0.00     0.0000       -0.1473      0.1473       1473112106.32
0.10     0.25     42.3759      42.7677      0.3918       0.92        
0.10     0.50     73.5651      74.1243      0.5591       0.76        
0.10     0.75     90.1279      90.4945      0.3666       0.41        
0.10     1.00     94.9305      95.1694      0.2389       0.25        
0.25     0.00     0.0000       0.0838       0.0838       838384628.30
0.25     0.25     26.4461      26.6060      0.1599       0.60        
0.25     0.50     48.7013      48.9257      0.2244       0.46        
0.25     0.75     63.4161      63.7170      0.3009       0.47        
0.25     1.00     68.5446      68.8684      0.3238       0.47        
0.50     0.00     0.0000       -0.0426      0.0426       426206588.75
0.50     0.25     14.1899      14.2404      0.0505       0.36        
0.50     0.50     26.2188      26.3569      0.1381       0.53        
0.50     0.75     34.2557      34.4410      0.1853       0.54        
0.50     1.00     37.0777      37.2754      0.1976       0.53        
0.75     0.00     0.0000       -0.0157      0.0157       157423019.41
0.75     0.25     7.6571       7.6836       0.0265       0.35        
0.75     0.50     14.1485      14.1936      0.0451       0.32        
0.75     0.75     18.4859      18.5533      0.0673       0.36        
0.75     1.00     20.0090      20.0875      0.0785       0.39        
1.00     0.00     0.0000       0.0250       0.0250       249567031.86
1.00     0.25     4.1321       4.1493       0.0172       0.42        
1.00     0.50     7.6351       7.6606       0.0254       0.33        
1.00     0.75     9.9758       10.0262      0.0505       0.51        
1.00     1.00     10.7977      10.8316      0.0339       0.31        

Overall Statistics:
Mean Absolute Error: 0.151826 kPa
Mean Relative Error: 0.1518%

📈 Creating training analysis plots...
Computing degree of consolidation curves...

FINAL RESULTS SUMMARY
Final Total Loss: 0.00043388
Final Physics Loss (Lf): 0.00035674
Final Known Data Loss (Lk): 0.00007713

In [4]:

Copied!





def create_plots(model, H, T, cv, u0):
    """Create plot"""

    # Time instants as in the paper (converted to years, assuming their 4h = our 1 year scale)
    times_hours = [0.1, 0.5, 1.0, 2.0, 3.0, 4.0]  # hours in their paper
    times_years = [t/4.0 for t in times_hours]  # convert to our year scale

    # Depth points (normalized)
    z_normalized = np.linspace(0, 1, 100)  # x/h in paper notation
    z_actual = z_normalized * H

    # Colors for different time instants
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown']

    # Create the plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

    # Plot 1: Similar to Zhang et al. Figure 11a
    for i, (t_year, t_hour, color) in enumerate(zip(times_years, times_hours, colors)):

        # Analytical solution
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)
        u_normalized_analytical = u_analytical / u0  # u/u0 normalization

        # PINN prediction
        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()
            u_normalized_pinn = u_pinn / u0  # u/u0 normalization

        # Plot with same style as paper
        ax1.plot(u_normalized_analytical, z_normalized, '-', color=color, linewidth=2,
                label=f't={t_hour}h (Analytical)', alpha=0.8)
        ax1.plot(u_normalized_pinn, z_normalized, '--', color=color, linewidth=2,
                label=f't={t_hour}h (PINN)', alpha=0.7)

    ax1.set_xlabel('u/u₀')
    ax1.set_ylabel('z/H')
    ax1.set_title('Excess Pore Pressure Profiles\n(Similar to Zhang et al. Fig 11a)')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.invert_yaxis()  # Depth increases downward

    # Plot 2: Error analysis
    for i, (t_year, t_hour, color) in enumerate(zip(times_years, times_hours, colors)):
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)

        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()

        error = np.abs(u_pinn - u_analytical)
        ax2.semilogy(error, z_normalized, '-', color=color, linewidth=2, label=f't={t_hour}h')

    ax2.set_xlabel('Absolute Error (kPa)')
    ax2.set_ylabel('z/H')
    ax2.set_title('Absolute Error in Pore Pressure')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.invert_yaxis()

    plt.tight_layout()
    plt.show()
    return times_years, z_normalized

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

MODEL_PATH = "pinn_consolidation_model.pth"

print(f" Layer thickness (H): {H} m")
print(f" Coefficient of consolidation (cv): {cv} m²/year")
print(f" Initial excess pore pressure (u₀): {u0} kPa")
print(f" Simulation time: {T} years")

# Load trained model
model, model_info, loaded_params = load_trained_model(MODEL_PATH)

# Create plots and analysis
times_years, z_normalized = create_plots(model, H, T, cv, u0)
def create_plots(model, H, T, cv, u0):
    """Create plot"""

    # Time instants as in the paper (converted to years, assuming their 4h = our 1 year scale)
    times_hours = [0.1, 0.5, 1.0, 2.0, 3.0, 4.0]  # hours in their paper
    times_years = [t/4.0 for t in times_hours]  # convert to our year scale

    # Depth points (normalized)
    z_normalized = np.linspace(0, 1, 100)  # x/h in paper notation
    z_actual = z_normalized * H

    # Colors for different time instants
    colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown']

    # Create the plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

    # Plot 1: Similar to Zhang et al. Figure 11a
    for i, (t_year, t_hour, color) in enumerate(zip(times_years, times_hours, colors)):

        # Analytical solution
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)
        u_normalized_analytical = u_analytical / u0  # u/u0 normalization

        # PINN prediction
        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()
            u_normalized_pinn = u_pinn / u0  # u/u0 normalization

        # Plot with same style as paper
        ax1.plot(u_normalized_analytical, z_normalized, '-', color=color, linewidth=2,
                label=f't={t_hour}h (Analytical)', alpha=0.8)
        ax1.plot(u_normalized_pinn, z_normalized, '--', color=color, linewidth=2,
                label=f't={t_hour}h (PINN)', alpha=0.7)

    ax1.set_xlabel('u/u₀')
    ax1.set_ylabel('z/H')
    ax1.set_title('Excess Pore Pressure Profiles\n(Similar to Zhang et al. Fig 11a)')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    ax1.set_xlim(0, 1)
    ax1.set_ylim(0, 1)
    ax1.invert_yaxis()  # Depth increases downward

    # Plot 2: Error analysis
    for i, (t_year, t_hour, color) in enumerate(zip(times_years, times_hours, colors)):
        u_analytical = analytical_solution(z_actual, t_year, H, cv, u0)

        with torch.no_grad():
            z_tensor = torch.tensor(z_actual.reshape(-1, 1), dtype=torch.float32, device=device)
            t_tensor = torch.tensor(np.full_like(z_actual, t_year).reshape(-1, 1), dtype=torch.float32, device=device)
            zt = torch.cat([z_tensor, t_tensor], dim=1)
            u_pinn = model(zt).cpu().numpy().flatten()

        error = np.abs(u_pinn - u_analytical)
        ax2.semilogy(error, z_normalized, '-', color=color, linewidth=2, label=f't={t_hour}h')

    ax2.set_xlabel('Absolute Error (kPa)')
    ax2.set_ylabel('z/H')
    ax2.set_title('Absolute Error in Pore Pressure')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.invert_yaxis()

    plt.tight_layout()
    plt.show()
    return times_years, z_normalized

# Problem parameters (matching the scale from Zhang et al.)
H = 1.0          # Layer thickness (normalized)
T = 1.0          # Total time (years, equivalent to 4 hours in their scale)
cv = 1.0         # Coefficient of consolidation
u0 = 100.0       # Initial excess pore pressure

MODEL_PATH = "pinn_consolidation_model.pth"

print(f" Layer thickness (H): {H} m")
print(f" Coefficient of consolidation (cv): {cv} m²/year")
print(f" Initial excess pore pressure (u₀): {u0} kPa")
print(f" Simulation time: {T} years")

# Load trained model
model, model_info, loaded_params = load_trained_model(MODEL_PATH)

# Create plots and analysis
times_years, z_normalized = create_plots(model, H, T, cv, u0)

 Layer thickness (H): 1.0 m
 Coefficient of consolidation (cv): 1.0 m²/year
 Initial excess pore pressure (u₀): 100.0 kPa
 Simulation time: 1.0 years
📁 Loading trained model from pinn_consolidation_model.pth
✅ Model loaded successfully!
   - Training epochs: 6000
   - Converged: No
   - Final loss: 4.34e-04
   - Training time: 2830.19 seconds
   - Parameters: H=1.0, T=1.0, cv=1.0, u0=100.0

In [6]:

Copied!

comprehensive_analysis(model, H, T, cv, u0)
comprehensive_analysis(model, H, T, cv, u0)

Point-wise Comparison:
Time     Depth    Analytical   PINN         Error        Rel Error % 
---------------------------------------------------------------------------

0.10     0.00     0.0000       -0.1473      0.1473       1473112106.32
0.10     0.25     42.3759      42.7677      0.3918       0.92        
0.10     0.50     73.5651      74.1243      0.5591       0.76        
0.10     0.75     90.1279      90.4945      0.3666       0.41        
0.10     1.00     94.9305      95.1694      0.2389       0.25        
0.25     0.00     0.0000       0.0838       0.0838       838384628.30
0.25     0.25     26.4461      26.6060      0.1599       0.60        
0.25     0.50     48.7013      48.9257      0.2244       0.46        
0.25     0.75     63.4161      63.7170      0.3009       0.47        
0.25     1.00     68.5446      68.8684      0.3238       0.47        
0.50     0.00     0.0000       -0.0426      0.0426       426206588.75
0.50     0.25     14.1899      14.2404      0.0505       0.36        
0.50     0.50     26.2188      26.3569      0.1381       0.53        
0.50     0.75     34.2557      34.4410      0.1853       0.54        
0.50     1.00     37.0777      37.2754      0.1976       0.53        
0.75     0.00     0.0000       -0.0157      0.0157       157423019.41
0.75     0.25     7.6571       7.6836       0.0265       0.35        
0.75     0.50     14.1485      14.1936      0.0451       0.32        
0.75     0.75     18.4859      18.5533      0.0673       0.36        
0.75     1.00     20.0090      20.0875      0.0785       0.39        
1.00     0.00     0.0000       0.0250       0.0250       249567031.86
1.00     0.25     4.1321       4.1493       0.0172       0.42        
1.00     0.50     7.6351       7.6606       0.0254       0.33        
1.00     0.75     9.9758       10.0262      0.0505       0.51        
1.00     1.00     10.7977      10.8316      0.0339       0.31        

Overall Statistics:
Mean Absolute Error: 0.151826 kPa
Mean Relative Error: 0.1518%

In [7]:

Copied!





# Training loss visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Total loss evolution
ax1.semilogy(loss_components['total'])
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Total Loss')
ax1.set_title('Total Loss Evolution (L = Lk + Lf)')
ax1.grid(True)

# Loss components
ax2.semilogy(loss_components['Lf'], label='Lf (Physics)', alpha=0.8)
ax2.semilogy(loss_components['Lk'], label='Lk (Known Data)', alpha=0.8)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss Components')
ax2.set_title('Physics vs Known Data Loss')
ax2.legend()
ax2.grid(True)

# Detailed loss breakdown
ax3.semilogy(loss_components['bc'], label='Boundary Condition', alpha=0.7)
ax3.semilogy(loss_components['bottom'], label='Bottom BC (∂u/∂z=0)', alpha=0.7)
ax3.semilogy(loss_components['ic'], label='Initial Condition', alpha=0.7)
ax3.set_xlabel('Epoch')
ax3.set_ylabel('Individual Loss Terms')
ax3.set_title('Detailed Loss Component Breakdown')
ax3.legend()
ax3.grid(True)

# Final validation - degree of consolidation
t_eval = np.linspace(0.01, T, 50)
U_analytical = []
U_pinn = []

for t in t_eval:
    # Analytical degree of consolidation
    z_full = np.linspace(0, H, 100)
    u_full_analytical = analytical_solution(z_full, t, H, cv, u0)
    u_avg_analytical = np.trapezoid(u_full_analytical, z_full) / H
    U_analytical.append(1 - u_avg_analytical / u0)

    # PINN degree of consolidation
    with torch.no_grad():
        z_tensor = torch.tensor(z_full.reshape(-1, 1), dtype=torch.float32, device=device)
        t_tensor = torch.tensor(np.full_like(z_full, t).reshape(-1, 1), dtype=torch.float32, device=device)
        zt = torch.cat([z_tensor, t_tensor], dim=1)
        u_full_pinn = model(zt).cpu().numpy().flatten()
        u_avg_pinn = np.trapezoid(u_full_pinn, z_full) / H
        U_pinn.append(1 - u_avg_pinn / u0)

ax4.plot(t_eval, U_analytical, 'r-', linewidth=3, label='Analytical', alpha=0.8)
ax4.plot(t_eval, U_pinn, 'b--', linewidth=2, label='PINN')
ax4.set_xlabel('Time (years)')
ax4.set_ylabel('Degree of Consolidation, U')
ax4.set_title('Degree of Consolidation Comparison')
ax4.legend()
ax4.grid(True)
ax4.set_ylim(0, 1)

plt.tight_layout()
plt.show()

# Final summary
final_loss = loss_components['total'][-1]
final_lf = loss_components['Lf'][-1]
final_lk = loss_components['Lk'][-1]

print(f"\n" + "="*60)
print("FINAL RESULTS SUMMARY")
print("="*60)
print(f"Final Total Loss: {final_loss:.8f}")
print(f"Final Physics Loss (Lf): {final_lf:.8f}")
print(f"Final Known Data Loss (Lk): {final_lk:.8f}")
# Training loss visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Total loss evolution
ax1.semilogy(loss_components['total'])
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Total Loss')
ax1.set_title('Total Loss Evolution (L = Lk + Lf)')
ax1.grid(True)

# Loss components
ax2.semilogy(loss_components['Lf'], label='Lf (Physics)', alpha=0.8)
ax2.semilogy(loss_components['Lk'], label='Lk (Known Data)', alpha=0.8)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss Components')
ax2.set_title('Physics vs Known Data Loss')
ax2.legend()
ax2.grid(True)

# Detailed loss breakdown
ax3.semilogy(loss_components['bc'], label='Boundary Condition', alpha=0.7)
ax3.semilogy(loss_components['bottom'], label='Bottom BC (∂u/∂z=0)', alpha=0.7)
ax3.semilogy(loss_components['ic'], label='Initial Condition', alpha=0.7)
ax3.set_xlabel('Epoch')
ax3.set_ylabel('Individual Loss Terms')
ax3.set_title('Detailed Loss Component Breakdown')
ax3.legend()
ax3.grid(True)

# Final validation - degree of consolidation
t_eval = np.linspace(0.01, T, 50)
U_analytical = []
U_pinn = []

for t in t_eval:
    # Analytical degree of consolidation
    z_full = np.linspace(0, H, 100)
    u_full_analytical = analytical_solution(z_full, t, H, cv, u0)
    u_avg_analytical = np.trapezoid(u_full_analytical, z_full) / H
    U_analytical.append(1 - u_avg_analytical / u0)

    # PINN degree of consolidation
    with torch.no_grad():
        z_tensor = torch.tensor(z_full.reshape(-1, 1), dtype=torch.float32, device=device)
        t_tensor = torch.tensor(np.full_like(z_full, t).reshape(-1, 1), dtype=torch.float32, device=device)
        zt = torch.cat([z_tensor, t_tensor], dim=1)
        u_full_pinn = model(zt).cpu().numpy().flatten()
        u_avg_pinn = np.trapezoid(u_full_pinn, z_full) / H
        U_pinn.append(1 - u_avg_pinn / u0)

ax4.plot(t_eval, U_analytical, 'r-', linewidth=3, label='Analytical', alpha=0.8)
ax4.plot(t_eval, U_pinn, 'b--', linewidth=2, label='PINN')
ax4.set_xlabel('Time (years)')
ax4.set_ylabel('Degree of Consolidation, U')
ax4.set_title('Degree of Consolidation Comparison')
ax4.legend()
ax4.grid(True)
ax4.set_ylim(0, 1)

plt.tight_layout()
plt.show()

# Final summary
final_loss = loss_components['total'][-1]
final_lf = loss_components['Lf'][-1]
final_lk = loss_components['Lk'][-1]

print(f"\n" + "="*60)
print("FINAL RESULTS SUMMARY")
print("="*60)
print(f"Final Total Loss: {final_loss:.8f}")
print(f"Final Physics Loss (Lf): {final_lf:.8f}")
print(f"Final Known Data Loss (Lk): {final_lk:.8f}")

============================================================
FINAL RESULTS SUMMARY
============================================================
Final Total Loss: 0.00043388
Final Physics Loss (Lf): 0.00035674
Final Known Data Loss (Lk): 0.00007713

In [ ]: