Close Menu
    Facebook X (Twitter) Instagram
    • Privacy Policy
    • Terms Of Service
    • Social Media Disclaimer
    • DMCA Compliance
    • Anti-Spam Policy
    Facebook X (Twitter) Instagram
    Deep Tech Ledger
    • Home
    • Crypto News
      • Bitcoin
      • Ethereum
      • Altcoins
      • Blockchain
      • DeFi
    • AI News
    • Stock News
    • Learn
      • AI for Beginners
      • AI Tips
      • Make Money with AI
    • Reviews
    • Tools
      • Best AI Tools
      • Crypto Market Cap List
      • Stock Market Overview
      • Market Heatmap
    • Contact
    Deep Tech Ledger
    Home»AI News»How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels
    How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels
    AI News

    How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels

    March 17, 20263 Mins Read
    Share
    Facebook Twitter LinkedIn Pinterest Email
    synthesia


    angles = np.linspace(0.0, 2.0 * np.pi, n_particles, endpoint=False, dtype=np.float32)
    px0_np = 0.4 * np.cos(angles).astype(np.float32)
    py0_np = (0.7 + 0.15 * np.sin(angles)).astype(np.float32)
    vx0_np = (-0.8 * np.sin(angles)).astype(np.float32)
    vy0_np = (0.8 * np.cos(angles)).astype(np.float32)

    px0_wp = wp.array(px0_np, dtype=wp.float32, device=device)
    py0_wp = wp.array(py0_np, dtype=wp.float32, device=device)
    vx0_wp = wp.array(vx0_np, dtype=wp.float32, device=device)
    vy0_wp = wp.array(vy0_np, dtype=wp.float32, device=device)

    state_size = (steps + 1) * n_particles
    px_wp = wp.empty(state_size, dtype=wp.float32, device=device)
    py_wp = wp.empty(state_size, dtype=wp.float32, device=device)
    vx_wp = wp.empty(state_size, dtype=wp.float32, device=device)
    vy_wp = wp.empty(state_size, dtype=wp.float32, device=device)

    wp.launch(
    kernel=init_particles_kernel,
    dim=n_particles,
    inputs=[n_particles, px0_wp, py0_wp, vx0_wp, vy0_wp],
    outputs=[px_wp, py_wp, vx_wp, vy_wp],
    device=device,
    )

    synthesia

    wp.launch(
    kernel=simulate_particles_kernel,
    dim=steps * n_particles,
    inputs=[n_particles, dt, gravity, damping, bounce, radius],
    outputs=[px_wp, py_wp, vx_wp, vy_wp],
    device=device,
    )
    wp.synchronize()

    px_traj = px_wp.numpy().reshape(steps + 1, n_particles)
    py_traj = py_wp.numpy().reshape(steps + 1, n_particles)

    sample_ids = np.linspace(0, n_particles – 1, 16, dtype=int)
    plt.figure(figsize=(8, 6))
    for idx in sample_ids:
    plt.plot(px_traj[:, idx], py_traj[:, idx], linewidth=1.5)
    plt.axhline(radius, linestyle=”–“)
    plt.xlim(-1.05, 1.05)
    plt.ylim(0.0, 1.25)
    plt.title(f”Warp particle trajectories on {device}”)
    plt.xlabel(“x”)
    plt.ylabel(“y”)
    plt.show()

    proj_steps = 180
    proj_dt = np.float32(0.025)
    proj_g = np.float32(-9.8)
    target_x = np.float32(3.8)
    target_y = np.float32(0.0)

    vx_value = np.float32(2.0)
    vy_value = np.float32(6.5)
    lr = 0.08
    iters = 60

    loss_history = []
    vx_history = []
    vy_history = []

    for it in range(iters):
    init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)
    init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)

    x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
    y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
    vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
    vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
    loss_wp = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)

    tape = wp.Tape()
    with tape:
    wp.launch(
    kernel=init_projectile_kernel,
    dim=1,
    inputs=[],
    outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, init_vx_wp, init_vy_wp],
    device=device,
    )
    wp.launch(
    kernel=projectile_step_kernel,
    dim=proj_steps,
    inputs=[proj_dt, proj_g],
    outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
    device=device,
    )
    wp.launch(
    kernel=projectile_loss_kernel,
    dim=1,
    inputs=[proj_steps, target_x, target_y],
    outputs=[x_hist_wp, y_hist_wp, loss_wp],
    device=device,
    )

    tape.backward(loss=loss_wp)
    wp.synchronize()

    current_loss = float(loss_wp.numpy()[0])
    grad_vx = float(init_vx_wp.grad.numpy()[0])
    grad_vy = float(init_vy_wp.grad.numpy()[0])

    vx_value = np.float32(vx_value – lr * grad_vx)
    vy_value = np.float32(vy_value – lr * grad_vy)

    loss_history.append(current_loss)
    vx_history.append(float(vx_value))
    vy_history.append(float(vy_value))

    if it % 10 == 0 or it == iters – 1:
    print(f”iter={it:02d} loss={current_loss:.6f} vx={vx_value:.4f} vy={vy_value:.4f} grad=({grad_vx:.4f}, {grad_vy:.4f})”)

    final_init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device)
    final_init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device)
    x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
    y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
    vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
    vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)

    wp.launch(
    kernel=init_projectile_kernel,
    dim=1,
    inputs=[],
    outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, final_init_vx_wp, final_init_vy_wp],
    device=device,
    )
    wp.launch(
    kernel=projectile_step_kernel,
    dim=proj_steps,
    inputs=[proj_dt, proj_g],
    outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
    device=device,
    )
    wp.synchronize()

    x_path = x_hist_wp.numpy()
    y_path = y_hist_wp.numpy()

    fig = plt.figure(figsize=(15, 4))

    ax1 = fig.add_subplot(1, 3, 1)
    ax1.plot(loss_history)
    ax1.set_title(“Optimization loss”)
    ax1.set_xlabel(“Iteration”)
    ax1.set_ylabel(“Squared distance”)

    ax2 = fig.add_subplot(1, 3, 2)
    ax2.plot(vx_history, label=”vx”)
    ax2.plot(vy_history, label=”vy”)
    ax2.set_title(“Learned initial velocity”)
    ax2.set_xlabel(“Iteration”)
    ax2.legend()

    ax3 = fig.add_subplot(1, 3, 3)
    ax3.plot(x_path, y_path, linewidth=2)
    ax3.scatter([target_x], [target_y], s=80, marker=”x”)
    ax3.set_title(“Differentiable projectile trajectory”)
    ax3.set_xlabel(“x”)
    ax3.set_ylabel(“y”)
    ax3.set_ylim(-0.1, max(1.0, float(np.max(y_path)) + 0.3))

    plt.tight_layout()
    plt.show()

    final_dx = float(x_path[-1] – target_x)
    final_dy = float(y_path[-1] – target_y)
    final_dist = math.sqrt(final_dx * final_dx + final_dy * final_dy)
    print(f”Final target miss distance: {final_dist:.6f}”)
    print(f”Optimized initial velocity: vx={vx_value:.6f}, vy={vy_value:.6f}”)



    Source link

    ledger
    Share. Facebook Twitter Pinterest LinkedIn Tumblr Email
    CryptoExpert
    • Website

    I’m someone who’s deeply curious about crypto and artificial intelligence. I created this site to share what I’m learning, break down complex ideas, and keep people updated on what’s happening in crypto and AI—without the unnecessary hype.

    Related Posts

    U.S. Holds Off on New AI Chip Export Rules in Surprise Move in Tech Export Wars

    March 16, 2026

    Can AI help predict which heart-failure patients will worsen within a year? | MIT News

    March 15, 2026

    NanoClaw and Docker partner to make sandboxes the safest way for enterprises to deploy AI agents

    March 14, 2026

    How multi-agent AI economics influence business automation

    March 13, 2026
    Add A Comment
    Leave A Reply Cancel Reply

    changelly
    Latest Posts

    Bitmine Immersion Technologies Crosses 4.59 million ETH After Purchasing 60,999 Tokens in a Single Week

    March 17, 2026

    DAOs May Need To Ditch Decentralization To Court Institutions

    March 17, 2026

    Bitcoin Bollinger Bands Setting Up BTC Price for “Powerful Move”

    March 17, 2026

    Stocks Settle Sharply Higher as Crude Oil Slumps

    March 17, 2026

    How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels

    March 17, 2026
    bybit
    LEGAL INFORMATION
    • Privacy Policy
    • Terms Of Service
    • Social Media Disclaimer
    • DMCA Compliance
    • Anti-Spam Policy
    Top Insights

    Wife Uses CCTV To Pocket $176 Million

    March 17, 2026

    10 High Income Skills For Your 20s That AI Won’t Replace

    March 17, 2026
    kraken
    Facebook X (Twitter) Instagram Pinterest
    © 2026 DeepTechLedger.com - All rights reserved.

    Type above and press Enter to search. Press Esc to cancel.