LIVE NEWS
  • AI costs how much? GitHub Copilot users react to new usage-based pricing system.
  • European Parliament committee votes to scrap US tariffs
  • Hints and Solutions for June 2
  • It has the highest levels of toxic Pfas in drinking water in Scotland. But how did this remote island become awash with forever chemicals? | Pfas
  • For veterans, a place where peace can take root : NPR
  • This common amino acid helped mice survive deadly inflammation
  • Apple Will Reportedly Add Bill-Splitting Feature to iOS 27
  • Opinion | Putin Has No Good Way Out of His War
Prime Reports
  • Home
  • Popular Now
  • Crypto
  • Cybersecurity
  • Economy
  • Geopolitics
  • Global Markets
  • Politics
  • See More
    • Artificial Intelligence
    • Climate Risks
    • Defense
    • Healthcare Innovation
    • Science
    • Technology
    • World
Prime Reports
  • Home
  • Popular Now
  • Crypto
  • Cybersecurity
  • Economy
  • Geopolitics
  • Global Markets
  • Politics
  • Artificial Intelligence
  • Climate Risks
  • Defense
  • Healthcare Innovation
  • Science
  • Technology
  • World
Home»Artificial Intelligence»How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels
Artificial Intelligence

How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels

primereportsBy primereportsMarch 17, 2026No Comments3 Mins Read
Share Facebook Twitter Pinterest LinkedIn Tumblr Reddit Telegram Email
How to Build High-Performance GPU-Accelerated Simulations and Differentiable Physics Workflows Using NVIDIA Warp Kernels
Share
Facebook Twitter LinkedIn Pinterest Email


angles = np.linspace(0.0, 2.0 * np.pi, n_particles, endpoint=False, dtype=np.float32)
px0_np = 0.4 * np.cos(angles).astype(np.float32)
py0_np = (0.7 + 0.15 * np.sin(angles)).astype(np.float32)
vx0_np = (-0.8 * np.sin(angles)).astype(np.float32)
vy0_np = (0.8 * np.cos(angles)).astype(np.float32)


px0_wp = wp.array(px0_np, dtype=wp.float32, device=device)
py0_wp = wp.array(py0_np, dtype=wp.float32, device=device)
vx0_wp = wp.array(vx0_np, dtype=wp.float32, device=device)
vy0_wp = wp.array(vy0_np, dtype=wp.float32, device=device)


state_size = (steps + 1) * n_particles
px_wp = wp.empty(state_size, dtype=wp.float32, device=device)
py_wp = wp.empty(state_size, dtype=wp.float32, device=device)
vx_wp = wp.empty(state_size, dtype=wp.float32, device=device)
vy_wp = wp.empty(state_size, dtype=wp.float32, device=device)


wp.launch(
   kernel=init_particles_kernel,
   dim=n_particles,
   inputs=[n_particles, px0_wp, py0_wp, vx0_wp, vy0_wp],
   outputs=[px_wp, py_wp, vx_wp, vy_wp],
   device=device,
)


wp.launch(
   kernel=simulate_particles_kernel,
   dim=steps * n_particles,
   inputs=[n_particles, dt, gravity, damping, bounce, radius],
   outputs=[px_wp, py_wp, vx_wp, vy_wp],
   device=device,
)
wp.synchronize()


px_traj = px_wp.numpy().reshape(steps + 1, n_particles)
py_traj = py_wp.numpy().reshape(steps + 1, n_particles)


sample_ids = np.linspace(0, n_particles - 1, 16, dtype=int)
plt.figure(figsize=(8, 6))
for idx in sample_ids:
   plt.plot(px_traj[:, idx], py_traj[:, idx], linewidth=1.5)
plt.axhline(radius, linestyle="--")
plt.xlim(-1.05, 1.05)
plt.ylim(0.0, 1.25)
plt.title(f"Warp particle trajectories on {device}")
plt.xlabel("x")
plt.ylabel("y")
plt.show()


proj_steps = 180
proj_dt = np.float32(0.025)
proj_g = np.float32(-9.8)
target_x = np.float32(3.8)
target_y = np.float32(0.0)


vx_value = np.float32(2.0)
vy_value = np.float32(6.5)
lr = 0.08
iters = 60


loss_history = []
vx_history = []
vy_history = []


for it in range(iters):
   init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)
   init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device, requires_grad=True)


   x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
   y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
   vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
   vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device, requires_grad=True)
   loss_wp = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)


   tape = wp.Tape()
   with tape:
       wp.launch(
           kernel=init_projectile_kernel,
           dim=1,
           inputs=[],
           outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, init_vx_wp, init_vy_wp],
           device=device,
       )
       wp.launch(
           kernel=projectile_step_kernel,
           dim=proj_steps,
           inputs=[proj_dt, proj_g],
           outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
           device=device,
       )
       wp.launch(
           kernel=projectile_loss_kernel,
           dim=1,
           inputs=[proj_steps, target_x, target_y],
           outputs=[x_hist_wp, y_hist_wp, loss_wp],
           device=device,
       )


   tape.backward(loss=loss_wp)
   wp.synchronize()


   current_loss = float(loss_wp.numpy()[0])
   grad_vx = float(init_vx_wp.grad.numpy()[0])
   grad_vy = float(init_vy_wp.grad.numpy()[0])


   vx_value = np.float32(vx_value - lr * grad_vx)
   vy_value = np.float32(vy_value - lr * grad_vy)


   loss_history.append(current_loss)
   vx_history.append(float(vx_value))
   vy_history.append(float(vy_value))


   if it % 10 == 0 or it == iters - 1:
       print(f"iter={it:02d} loss={current_loss:.6f} vx={vx_value:.4f} vy={vy_value:.4f} grad=({grad_vx:.4f}, {grad_vy:.4f})")


final_init_vx_wp = wp.array(np.array([vx_value], dtype=np.float32), dtype=wp.float32, device=device)
final_init_vy_wp = wp.array(np.array([vy_value], dtype=np.float32), dtype=wp.float32, device=device)
x_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
y_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
vx_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)
vy_hist_wp = wp.zeros(proj_steps + 1, dtype=wp.float32, device=device)


wp.launch(
   kernel=init_projectile_kernel,
   dim=1,
   inputs=[],
   outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp, final_init_vx_wp, final_init_vy_wp],
   device=device,
)
wp.launch(
   kernel=projectile_step_kernel,
   dim=proj_steps,
   inputs=[proj_dt, proj_g],
   outputs=[x_hist_wp, y_hist_wp, vx_hist_wp, vy_hist_wp],
   device=device,
)
wp.synchronize()


x_path = x_hist_wp.numpy()
y_path = y_hist_wp.numpy()


fig = plt.figure(figsize=(15, 4))


ax1 = fig.add_subplot(1, 3, 1)
ax1.plot(loss_history)
ax1.set_title("Optimization loss")
ax1.set_xlabel("Iteration")
ax1.set_ylabel("Squared distance")


ax2 = fig.add_subplot(1, 3, 2)
ax2.plot(vx_history, label="vx")
ax2.plot(vy_history, label="vy")
ax2.set_title("Learned initial velocity")
ax2.set_xlabel("Iteration")
ax2.legend()


ax3 = fig.add_subplot(1, 3, 3)
ax3.plot(x_path, y_path, linewidth=2)
ax3.scatter([target_x], [target_y], s=80, marker="x")
ax3.set_title("Differentiable projectile trajectory")
ax3.set_xlabel("x")
ax3.set_ylabel("y")
ax3.set_ylim(-0.1, max(1.0, float(np.max(y_path)) + 0.3))


plt.tight_layout()
plt.show()


final_dx = float(x_path[-1] - target_x)
final_dy = float(y_path[-1] - target_y)
final_dist = math.sqrt(final_dx * final_dx + final_dy * final_dy)
print(f"Final target miss distance: {final_dist:.6f}")
print(f"Optimized initial velocity: vx={vx_value:.6f}, vy={vy_value:.6f}")

Share. Facebook Twitter Pinterest LinkedIn Tumblr Email
Previous ArticleQ&A: Why does gas set the price of electricity – and is there an alternative?
Next Article Venezuela defeats Italy, sets up WBC final against US | Baseball News
primereports
  • Website

Related Posts

Artificial Intelligence

Hints and Solutions for June 2

June 2, 2026
Artificial Intelligence

Flowise’s MCP implementation can run ghost commands

June 2, 2026
Artificial Intelligence

Dell Makes The Profits Up In Volume For Booming AI Servers

June 2, 2026
Add A Comment
Leave A Reply Cancel Reply

Top Posts

Paxton’s win over Cornyn sets up high-stakes Texas clash with Talarico

May 28, 202616 Views

Global Resources Outlook 2024 | UNEP

December 6, 202510 Views

Texas Democrat Talarico claims voting laws are rigged ahead of Paxton race

May 28, 20269 Views
Stay In Touch
  • Facebook
  • YouTube
  • TikTok
  • WhatsApp
  • Twitter
  • Instagram
Latest Reviews

Subscribe to Updates

Get the latest tech news from FooBar about tech, design and biz.

PrimeReports.org
Independent global news, analysis & insights.

PrimeReports.org brings you in-depth coverage of geopolitics, markets, technology and risk – with context that helps you understand what really matters.

Editorially independent · Opinions are those of the authors and not investment advice.
Facebook X (Twitter) LinkedIn YouTube
Key Sections
  • World
  • Geopolitics
  • Popular Now
  • Artificial Intelligence
  • Cybersecurity
  • Crypto
All Categories
  • Artificial Intelligence
  • Climate Risks
  • Crypto
  • Cybersecurity
  • Defense
  • Economy
  • Geopolitics
  • Global Markets
  • Healthcare Innovation
  • Politics
  • Popular Now
  • Science
  • Technology
  • World
  • About Us
  • Contact Us
  • Privacy Policy
  • Terms & Conditions
  • Disclaimer
  • Cookie Policy
  • DMCA / Copyright Notice
  • Editorial Policy

Sign up for Prime Reports Briefing – essential stories and analysis in your inbox.

By subscribing you agree to our Privacy Policy. You can opt out anytime.
Latest Stories
  • AI costs how much? GitHub Copilot users react to new usage-based pricing system.
  • European Parliament committee votes to scrap US tariffs
  • Hints and Solutions for June 2
© 2026 PrimeReports.org. All rights reserved.
Privacy Terms Contact

Type above and press Enter to search. Press Esc to cancel.