mirror of https://github.com/aziis98/asd-2024.git
Versione probabilmente non funzionante con CUDA
parent
b54ac113dd
commit
d4f6dbc2c0
@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "graphs_1-cuda"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
nalgebra = { version = "0.32.3", features = ["rand"] }
|
||||
petgraph = { version = "0.6.4", features = [
|
||||
"stable_graph",
|
||||
], default-features = false }
|
||||
num-traits = "0.2.17"
|
||||
rand = "0.8.5"
|
||||
macroquad = "0.4.4"
|
||||
petgraph-gen = "0.1.3"
|
||||
fdg = { git = "https://github.com/grantshandy/fdg" }
|
||||
asd = { path = "../../" }
|
||||
rayon = "1.10.0"
|
||||
cust = "0.3.2"
|
@ -0,0 +1,48 @@
|
||||
extern "C" __global__ void compute_forces(
|
||||
float *positions_x, float *positions_y,
|
||||
float *forces_x, float *forces_y,
|
||||
int *neighbors, float *distances,
|
||||
int num_nodes, int max_neighbors)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < num_nodes)
|
||||
{
|
||||
float pos_x = positions_x[idx];
|
||||
float pos_y = positions_y[idx];
|
||||
float force_x = 0.0f;
|
||||
float force_y = 0.0f;
|
||||
|
||||
for (int i = 0; i < max_neighbors; i++)
|
||||
{
|
||||
int other_idx = neighbors[idx * max_neighbors + i];
|
||||
if (other_idx == -1)
|
||||
break; // No more neighbors
|
||||
|
||||
float other_pos_x = positions_x[other_idx];
|
||||
float other_pos_y = positions_y[other_idx];
|
||||
float distance = distances[idx * max_neighbors + i];
|
||||
|
||||
float delta_x = other_pos_x - pos_x;
|
||||
float delta_y = other_pos_y - pos_y;
|
||||
float dist = delta_x * delta_x + delta_y * delta_y;
|
||||
float correction = dist - (distance * distance);
|
||||
|
||||
if (distance > 0.0f && dist > 1e-6f)
|
||||
{
|
||||
float scale = 0.01f * atanf(correction) / sqrtf(dist);
|
||||
force_x += delta_x * scale;
|
||||
force_y += delta_y * scale;
|
||||
}
|
||||
|
||||
if (dist > 1e-6f)
|
||||
{
|
||||
float repel_scale = 0.01f / max(dist, 1.0f);
|
||||
force_x -= delta_x * repel_scale;
|
||||
force_y -= delta_y * repel_scale;
|
||||
}
|
||||
}
|
||||
|
||||
forces_x[idx] = force_x;
|
||||
forces_y[idx] = force_y;
|
||||
}
|
||||
}
|
@ -0,0 +1,160 @@
|
||||
//
|
||||
// Generated by NVIDIA NVVM Compiler
|
||||
//
|
||||
// Compiler Build ID: CL-30672275
|
||||
// Cuda compilation tools, release 11.5, V11.5.119
|
||||
// Based on NVVM 7.0.1
|
||||
//
|
||||
|
||||
.version 7.5
|
||||
.target sm_52
|
||||
.address_size 64
|
||||
|
||||
// .globl compute_forces
|
||||
|
||||
.visible .entry compute_forces(
|
||||
.param .u64 compute_forces_param_0,
|
||||
.param .u64 compute_forces_param_1,
|
||||
.param .u64 compute_forces_param_2,
|
||||
.param .u64 compute_forces_param_3,
|
||||
.param .u64 compute_forces_param_4,
|
||||
.param .u64 compute_forces_param_5,
|
||||
.param .u32 compute_forces_param_6,
|
||||
.param .u32 compute_forces_param_7
|
||||
)
|
||||
{
|
||||
.reg .pred %p<12>;
|
||||
.reg .f32 %f<69>;
|
||||
.reg .b32 %r<18>;
|
||||
.reg .b64 %rd<26>;
|
||||
|
||||
|
||||
ld.param.u64 %rd9, [compute_forces_param_0];
|
||||
ld.param.u64 %rd10, [compute_forces_param_1];
|
||||
ld.param.u64 %rd5, [compute_forces_param_2];
|
||||
ld.param.u64 %rd6, [compute_forces_param_3];
|
||||
ld.param.u64 %rd7, [compute_forces_param_4];
|
||||
ld.param.u64 %rd8, [compute_forces_param_5];
|
||||
ld.param.u32 %r8, [compute_forces_param_6];
|
||||
ld.param.u32 %r7, [compute_forces_param_7];
|
||||
cvta.to.global.u64 %rd1, %rd10;
|
||||
cvta.to.global.u64 %rd2, %rd9;
|
||||
mov.u32 %r9, %ntid.x;
|
||||
mov.u32 %r10, %ctaid.x;
|
||||
mov.u32 %r11, %tid.x;
|
||||
mad.lo.s32 %r1, %r10, %r9, %r11;
|
||||
setp.ge.s32 %p1, %r1, %r8;
|
||||
@%p1 bra $L__BB0_12;
|
||||
|
||||
setp.lt.s32 %p2, %r7, 1;
|
||||
mov.f32 %f67, 0f00000000;
|
||||
mov.f32 %f68, %f67;
|
||||
@%p2 bra $L__BB0_11;
|
||||
|
||||
cvta.to.global.u64 %rd3, %rd8;
|
||||
cvta.to.global.u64 %rd4, %rd7;
|
||||
mul.wide.s32 %rd11, %r1, 4;
|
||||
add.s64 %rd12, %rd2, %rd11;
|
||||
ld.global.f32 %f1, [%rd12];
|
||||
add.s64 %rd13, %rd1, %rd11;
|
||||
ld.global.f32 %f2, [%rd13];
|
||||
mul.lo.s32 %r2, %r1, %r7;
|
||||
mov.u32 %r17, 0;
|
||||
|
||||
$L__BB0_3:
|
||||
add.s32 %r4, %r17, %r2;
|
||||
mul.wide.s32 %rd14, %r4, 4;
|
||||
add.s64 %rd15, %rd4, %rd14;
|
||||
ld.global.u32 %r5, [%rd15];
|
||||
setp.eq.s32 %p3, %r5, -1;
|
||||
@%p3 bra $L__BB0_11;
|
||||
|
||||
mul.wide.s32 %rd16, %r5, 4;
|
||||
add.s64 %rd17, %rd2, %rd16;
|
||||
ld.global.f32 %f26, [%rd17];
|
||||
sub.f32 %f5, %f26, %f1;
|
||||
add.s64 %rd18, %rd1, %rd16;
|
||||
ld.global.f32 %f27, [%rd18];
|
||||
sub.f32 %f6, %f27, %f2;
|
||||
mul.f32 %f28, %f6, %f6;
|
||||
fma.rn.f32 %f7, %f5, %f5, %f28;
|
||||
add.s64 %rd20, %rd3, %rd14;
|
||||
ld.global.f32 %f29, [%rd20];
|
||||
mul.f32 %f30, %f29, %f29;
|
||||
sub.f32 %f8, %f7, %f30;
|
||||
setp.leu.f32 %p4, %f29, 0f00000000;
|
||||
setp.leu.f32 %p5, %f7, 0f358637BD;
|
||||
or.pred %p6, %p4, %p5;
|
||||
@%p6 bra $L__BB0_8;
|
||||
|
||||
abs.f32 %f9, %f8;
|
||||
setp.leu.f32 %p7, %f9, 0f3F800000;
|
||||
mov.f32 %f62, %f9;
|
||||
@%p7 bra $L__BB0_7;
|
||||
|
||||
rcp.rn.f32 %f62, %f9;
|
||||
|
||||
$L__BB0_7:
|
||||
mul.rn.f32 %f31, %f62, %f62;
|
||||
mov.f32 %f32, 0fC0B59883;
|
||||
mov.f32 %f33, 0fBF52C7EA;
|
||||
fma.rn.f32 %f34, %f31, %f33, %f32;
|
||||
mov.f32 %f35, 0fC0D21907;
|
||||
fma.rn.f32 %f36, %f34, %f31, %f35;
|
||||
mul.f32 %f37, %f31, %f36;
|
||||
mul.f32 %f38, %f62, %f37;
|
||||
add.f32 %f39, %f31, 0f41355DC0;
|
||||
mov.f32 %f40, 0f41E6BD60;
|
||||
fma.rn.f32 %f41, %f39, %f31, %f40;
|
||||
mov.f32 %f42, 0f419D92C8;
|
||||
fma.rn.f32 %f43, %f41, %f31, %f42;
|
||||
rcp.rn.f32 %f44, %f43;
|
||||
fma.rn.f32 %f45, %f38, %f44, %f62;
|
||||
mov.f32 %f46, 0f3FC90FDB;
|
||||
sub.f32 %f47, %f46, %f45;
|
||||
setp.gt.f32 %p8, %f9, 0f3F800000;
|
||||
selp.f32 %f48, %f47, %f45, %p8;
|
||||
mov.b32 %r13, %f48;
|
||||
mov.b32 %r14, %f8;
|
||||
and.b32 %r15, %r14, -2147483648;
|
||||
or.b32 %r16, %r15, %r13;
|
||||
mov.b32 %f49, %r16;
|
||||
setp.le.f32 %p9, %f9, 0f7F800000;
|
||||
selp.f32 %f50, %f49, %f48, %p9;
|
||||
mul.f32 %f51, %f50, 0f3C23D70A;
|
||||
sqrt.rn.f32 %f52, %f7;
|
||||
div.rn.f32 %f53, %f51, %f52;
|
||||
fma.rn.f32 %f67, %f5, %f53, %f67;
|
||||
fma.rn.f32 %f68, %f6, %f53, %f68;
|
||||
|
||||
$L__BB0_8:
|
||||
@%p5 bra $L__BB0_10;
|
||||
|
||||
mov.f32 %f54, 0f3F800000;
|
||||
max.f32 %f55, %f7, %f54;
|
||||
mov.f32 %f56, 0f3C23D70A;
|
||||
div.rn.f32 %f57, %f56, %f55;
|
||||
mul.f32 %f58, %f5, %f57;
|
||||
sub.f32 %f67, %f67, %f58;
|
||||
mul.f32 %f59, %f6, %f57;
|
||||
sub.f32 %f68, %f68, %f59;
|
||||
|
||||
$L__BB0_10:
|
||||
add.s32 %r17, %r17, 1;
|
||||
setp.lt.s32 %p11, %r17, %r7;
|
||||
@%p11 bra $L__BB0_3;
|
||||
|
||||
$L__BB0_11:
|
||||
cvta.to.global.u64 %rd21, %rd6;
|
||||
cvta.to.global.u64 %rd22, %rd5;
|
||||
mul.wide.s32 %rd23, %r1, 4;
|
||||
add.s64 %rd24, %rd22, %rd23;
|
||||
st.global.f32 [%rd24], %f67;
|
||||
add.s64 %rd25, %rd21, %rd23;
|
||||
st.global.f32 [%rd25], %f68;
|
||||
|
||||
$L__BB0_12:
|
||||
ret;
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,223 @@
|
||||
use cust::prelude::*;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
async fn main() {
|
||||
// Initialize CUDA context
|
||||
let _ctx = cust::quick_init().unwrap();
|
||||
|
||||
// Load the CUDA module
|
||||
let mut file = File::open("kernel.ptx").unwrap();
|
||||
let mut ptx = String::new();
|
||||
file.read_to_string(&mut ptx).unwrap();
|
||||
let module = Module::load_from_string(&ptx).unwrap();
|
||||
let stream = Stream::new(StreamFlags::DEFAULT, None).unwrap();
|
||||
|
||||
let mut graph = load_graph();
|
||||
|
||||
let mut desired_distance_matrix = HashMap::new();
|
||||
graph.node_indices().for_each(|idx| {
|
||||
desired_distance_matrix.insert(idx, dijkstra(&graph, idx, None, |_| 1.0 as f32));
|
||||
});
|
||||
|
||||
loop {
|
||||
let num_nodes = graph.node_count();
|
||||
let max_neighbors = 10; // Adjust based on your data
|
||||
|
||||
// Prepare data for GPU
|
||||
let positions_x: Vec<f32> = graph.node_weights().map(|(_, pos)| pos.x).collect();
|
||||
let positions_y: Vec<f32> = graph.node_weights().map(|(_, pos)| pos.y).collect();
|
||||
let mut forces_x = vec![0.0f32; num_nodes];
|
||||
let mut forces_y = vec![0.0f32; num_nodes];
|
||||
let neighbors = vec![-1; num_nodes * max_neighbors]; // Placeholder for neighbors
|
||||
let distances = vec![0.0f32; num_nodes * max_neighbors]; // Placeholder for distances
|
||||
|
||||
// TODO: Fill neighbors and distances arrays based on your graph structure
|
||||
|
||||
// Allocate device memory
|
||||
let positions_x_device = positions_x.as_slice().as_dvec().unwrap();
|
||||
let positions_y_device = positions_y.as_slice().as_dvec().unwrap();
|
||||
let forces_x_device = forces_x.as_mut_slice().as_dvec().unwrap();
|
||||
let forces_y_device = forces_y.as_mut_slice().as_dvec().unwrap();
|
||||
let neighbors_device = neighbors.as_slice().as_dvec().unwrap();
|
||||
let distances_device = distances.as_slice().as_dvec().unwrap();
|
||||
|
||||
// Launch the CUDA kernel
|
||||
unsafe {
|
||||
launch!(module.compute_forces<<<num_nodes / 256 + 1, 256, 0, stream>>>(
|
||||
positions_x_device.as_device_ptr(),
|
||||
positions_y_device.as_device_ptr(),
|
||||
forces_x_device.as_device_ptr(),
|
||||
forces_y_device.as_device_ptr(),
|
||||
neighbors_device.as_device_ptr(),
|
||||
distances_device.as_device_ptr(),
|
||||
num_nodes as i32,
|
||||
max_neighbors as i32
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
stream.synchronize().unwrap();
|
||||
|
||||
// Copy results back to host
|
||||
forces_x_device.copy_to(&mut forces_x).unwrap();
|
||||
forces_y_device.copy_to(&mut forces_y).unwrap();
|
||||
|
||||
// Update node positions
|
||||
for (i, (force_x, force_y)) in forces_x.iter().zip(forces_y.iter()).enumerate() {
|
||||
let (_, pos) = graph.node_weight_mut(NodeIndex::new(i)).unwrap();
|
||||
pos.x += force_x;
|
||||
pos.y += force_y;
|
||||
}
|
||||
|
||||
// Render
|
||||
let now = Instant::now();
|
||||
clear_background(WHITE);
|
||||
draw_graph(&graph);
|
||||
let elapsed = now.elapsed();
|
||||
println!("frame: {:?}", elapsed);
|
||||
next_frame().await;
|
||||
}
|
||||
}
|
||||
|
||||
fn load_graph() -> StableGraph<(String, Point2<f32>), ()> {
|
||||
println!("Loading graph");
|
||||
|
||||
let mut graph = StableGraph::new();
|
||||
|
||||
let file = std::fs::File::open(env::args().nth(1).expect("missing gfa file argument")).unwrap();
|
||||
let entries = parser::parse_source(file).unwrap();
|
||||
|
||||
let mut index_map = HashMap::new();
|
||||
|
||||
let node_count = entries
|
||||
.iter()
|
||||
.filter_map(|entry| match entry {
|
||||
Entry::Segment { id, .. } => Some(id),
|
||||
_ => None,
|
||||
})
|
||||
.count();
|
||||
|
||||
println!("Node count: {}", node_count);
|
||||
|
||||
let radius = (node_count as f32).sqrt();
|
||||
|
||||
let mut i = -10.0;
|
||||
|
||||
for entry in entries
|
||||
.iter()
|
||||
.filter(|entry| matches!(entry, Entry::Link { .. }))
|
||||
.take(3000)
|
||||
{
|
||||
// println!("{:?}", entry);
|
||||
|
||||
if let Entry::Link {
|
||||
from,
|
||||
from_orient,
|
||||
to,
|
||||
to_orient,
|
||||
} = entry
|
||||
{
|
||||
// add first node if not present
|
||||
let a = index_map
|
||||
.entry(from.clone())
|
||||
.or_insert_with(|| {
|
||||
i += 1.0;
|
||||
|
||||
graph.add_node((
|
||||
format!("{}{}", from, from_orient),
|
||||
Point2::new(rand::gen_range(0.0, radius), rand::gen_range(0.0, radius)),
|
||||
// Point2::new(i, 50.0 + rand::gen_range(0.0, 100.0)),
|
||||
))
|
||||
})
|
||||
.to_owned();
|
||||
|
||||
// add second node if not present
|
||||
let b = index_map
|
||||
.entry(to.clone())
|
||||
.or_insert_with(|| {
|
||||
i += 1.0;
|
||||
|
||||
graph.add_node((
|
||||
format!("{}{}", from, to_orient),
|
||||
Point2::new(rand::gen_range(0.0, radius), rand::gen_range(0.0, radius)),
|
||||
// Point2::new(i, 50.0 + rand::gen_range(0.0, 100.0)),
|
||||
))
|
||||
})
|
||||
.to_owned();
|
||||
|
||||
graph.add_edge(a, b, ());
|
||||
graph.add_edge(b, a, ());
|
||||
}
|
||||
}
|
||||
|
||||
println!("Loading completed");
|
||||
|
||||
graph
|
||||
}
|
||||
|
||||
fn draw_graph(graph: &StableGraph<(String, Point2<f32>), ()>) {
|
||||
let (width, height) = (screen_width(), screen_height());
|
||||
|
||||
let (min_x, max_x) = graph
|
||||
.node_weights()
|
||||
.map(|(_, pos)| pos.x)
|
||||
.fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), x| {
|
||||
(min.min(x), max.max(x))
|
||||
});
|
||||
|
||||
let (min_y, max_y) = graph
|
||||
.node_weights()
|
||||
.map(|(_, pos)| pos.y)
|
||||
.fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), y| {
|
||||
(min.min(y), max.max(y))
|
||||
});
|
||||
|
||||
let source_range: f32 = (max_x - min_x).max(max_y - min_y);
|
||||
|
||||
for idx in graph.edge_indices() {
|
||||
let ((_, source), (_, target)) = graph
|
||||
.edge_endpoints(idx)
|
||||
.map(|(a, b)| (graph.node_weight(a).unwrap(), graph.node_weight(b).unwrap()))
|
||||
.unwrap();
|
||||
|
||||
draw_line(
|
||||
remap(source.x, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(source.y, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
remap(target.x, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(target.y, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
1.0,
|
||||
BLACK,
|
||||
);
|
||||
}
|
||||
|
||||
for (_label, pos) in graph.node_weights() {
|
||||
let x = remap(pos.x, min_x, min_x + source_range, 10.0, width - 10.0);
|
||||
let y = remap(pos.y, min_y, min_y + source_range, 10.0, height - 10.0);
|
||||
|
||||
draw_circle(x, y, 2.0, RED);
|
||||
// draw_text(label.as_str(), x - 30.0, y - 30.0, 10.0, BLACK);
|
||||
}
|
||||
|
||||
draw_line(
|
||||
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
remap(100.0, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
2.0,
|
||||
BLUE,
|
||||
);
|
||||
|
||||
draw_line(
|
||||
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
|
||||
remap(100.0, min_y, min_y + source_range, 10.0, height - 10.0),
|
||||
2.0,
|
||||
BLUE,
|
||||
);
|
||||
}
|
||||
|
||||
fn remap(value: f32, from_min: f32, from_max: f32, to_min: f32, to_max: f32) -> f32 {
|
||||
(value - from_min) / (from_max - from_min) * (to_max - to_min) + to_min
|
||||
}
|
Loading…
Reference in New Issue