Versione probabilmente non funzionante con CUDA

main
Luca Lombardo 6 months ago
parent b54ac113dd
commit d4f6dbc2c0

123
Cargo.lock generated

@ -938,6 +938,52 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96a6ac251f4a2aca6b3f91340350eab87ae57c3f127ffeb585e92bd336717991"
[[package]]
name = "cust"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6cc71911e179f12483b9734120b45bd00bf64fab085cc4818428523eedd469"
dependencies = [
"bitflags 1.3.2",
"bytemuck",
"cust_core",
"cust_derive",
"cust_raw",
"find_cuda_helper",
]
[[package]]
name = "cust_core"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "039f79662cb8f890cbf335e818cd522d6e3a53fe63f61d1aaaf859cd3d975f06"
dependencies = [
"cust_derive",
"glam 0.20.5",
"mint",
"vek",
]
[[package]]
name = "cust_derive"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a3bc95fe629aed92b2423de6ccff9e40174b21d19cb6ee6281a4d04ac72f66"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "cust_raw"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbf40d6ade12cb9828bbc844b9875c7b93d25e67a3c9bf61c7aa3ae09e402bf8"
dependencies = [
"find_cuda_helper",
]
[[package]]
name = "derivative"
version = "2.2.0"
@ -1313,12 +1359,21 @@ version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2417e237094dfd115a4aa11b2a3dc6cda45e8d36572a7862ba9a48402f7441a3"
dependencies = [
"glam",
"glam 0.21.3",
"hashlink",
"petgraph",
"quad-rand",
]
[[package]]
name = "find_cuda_helper"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9f9e65c593dd01ac77daad909ea4ad17f0d6d1776193fc8ea766356177abdad"
dependencies = [
"glob",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
@ -1491,12 +1546,27 @@ dependencies = [
"xml-rs",
]
[[package]]
name = "glam"
version = "0.20.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f43e957e744be03f5801a55472f593d43fabdebf25a4585db250f04d86b1675f"
dependencies = [
"num-traits",
]
[[package]]
name = "glam"
version = "0.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "518faa5064866338b013ff9b2350dc318e14cc4fcd6cb8206d7e7c9886c98815"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "glow"
version = "0.13.1"
@ -1641,6 +1711,22 @@ dependencies = [
"rayon",
]
[[package]]
name = "graphs_1-cuda"
version = "0.1.0"
dependencies = [
"asd",
"cust",
"fdg",
"macroquad",
"nalgebra",
"num-traits",
"petgraph",
"petgraph-gen",
"rand",
"rayon",
]
[[package]]
name = "hashbrown"
version = "0.13.2"
@ -1923,7 +2009,7 @@ checksum = "6d983c03e9171099b2eb2067206c78cb37904ea071bb9ea4602c43ed45bb7097"
dependencies = [
"bumpalo",
"fontdue",
"glam",
"glam 0.21.3",
"image",
"macroquad_macro",
"miniquad",
@ -2026,6 +2112,12 @@ dependencies = [
"simd-adler32",
]
[[package]]
name = "mint"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e53debba6bda7a793e5f99b8dacf19e626084f525f7829104ba9898f367d85ff"
[[package]]
name = "naga"
version = "0.19.2"
@ -2734,6 +2826,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.37.27"
@ -2810,6 +2911,12 @@ dependencies = [
"tiny-skia",
]
[[package]]
name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.202"
@ -3241,6 +3348,18 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "vek"
version = "0.15.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8085882662f9bc47fc8b0cdafa5e19df8f592f650c02b9083da8d45ac9eebd17"
dependencies = [
"approx",
"num-integer",
"num-traits",
"rustc_version",
]
[[package]]
name = "version_check"
version = "0.9.4"

@ -0,0 +1,18 @@
[package]
name = "graphs_1-cuda"
version = "0.1.0"
edition = "2021"
[dependencies]
nalgebra = { version = "0.32.3", features = ["rand"] }
petgraph = { version = "0.6.4", features = [
"stable_graph",
], default-features = false }
num-traits = "0.2.17"
rand = "0.8.5"
macroquad = "0.4.4"
petgraph-gen = "0.1.3"
fdg = { git = "https://github.com/grantshandy/fdg" }
asd = { path = "../../" }
rayon = "1.10.0"
cust = "0.3.2"

@ -0,0 +1,48 @@
extern "C" __global__ void compute_forces(
float *positions_x, float *positions_y,
float *forces_x, float *forces_y,
int *neighbors, float *distances,
int num_nodes, int max_neighbors)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_nodes)
{
float pos_x = positions_x[idx];
float pos_y = positions_y[idx];
float force_x = 0.0f;
float force_y = 0.0f;
for (int i = 0; i < max_neighbors; i++)
{
int other_idx = neighbors[idx * max_neighbors + i];
if (other_idx == -1)
break; // No more neighbors
float other_pos_x = positions_x[other_idx];
float other_pos_y = positions_y[other_idx];
float distance = distances[idx * max_neighbors + i];
float delta_x = other_pos_x - pos_x;
float delta_y = other_pos_y - pos_y;
float dist = delta_x * delta_x + delta_y * delta_y;
float correction = dist - (distance * distance);
if (distance > 0.0f && dist > 1e-6f)
{
float scale = 0.01f * atanf(correction) / sqrtf(dist);
force_x += delta_x * scale;
force_y += delta_y * scale;
}
if (dist > 1e-6f)
{
float repel_scale = 0.01f / max(dist, 1.0f);
force_x -= delta_x * repel_scale;
force_y -= delta_y * repel_scale;
}
}
forces_x[idx] = force_x;
forces_y[idx] = force_y;
}
}

@ -0,0 +1,160 @@
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-30672275
// Cuda compilation tools, release 11.5, V11.5.119
// Based on NVVM 7.0.1
//
.version 7.5
.target sm_52
.address_size 64
// .globl compute_forces
.visible .entry compute_forces(
.param .u64 compute_forces_param_0,
.param .u64 compute_forces_param_1,
.param .u64 compute_forces_param_2,
.param .u64 compute_forces_param_3,
.param .u64 compute_forces_param_4,
.param .u64 compute_forces_param_5,
.param .u32 compute_forces_param_6,
.param .u32 compute_forces_param_7
)
{
.reg .pred %p<12>;
.reg .f32 %f<69>;
.reg .b32 %r<18>;
.reg .b64 %rd<26>;
ld.param.u64 %rd9, [compute_forces_param_0];
ld.param.u64 %rd10, [compute_forces_param_1];
ld.param.u64 %rd5, [compute_forces_param_2];
ld.param.u64 %rd6, [compute_forces_param_3];
ld.param.u64 %rd7, [compute_forces_param_4];
ld.param.u64 %rd8, [compute_forces_param_5];
ld.param.u32 %r8, [compute_forces_param_6];
ld.param.u32 %r7, [compute_forces_param_7];
cvta.to.global.u64 %rd1, %rd10;
cvta.to.global.u64 %rd2, %rd9;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %ctaid.x;
mov.u32 %r11, %tid.x;
mad.lo.s32 %r1, %r10, %r9, %r11;
setp.ge.s32 %p1, %r1, %r8;
@%p1 bra $L__BB0_12;
setp.lt.s32 %p2, %r7, 1;
mov.f32 %f67, 0f00000000;
mov.f32 %f68, %f67;
@%p2 bra $L__BB0_11;
cvta.to.global.u64 %rd3, %rd8;
cvta.to.global.u64 %rd4, %rd7;
mul.wide.s32 %rd11, %r1, 4;
add.s64 %rd12, %rd2, %rd11;
ld.global.f32 %f1, [%rd12];
add.s64 %rd13, %rd1, %rd11;
ld.global.f32 %f2, [%rd13];
mul.lo.s32 %r2, %r1, %r7;
mov.u32 %r17, 0;
$L__BB0_3:
add.s32 %r4, %r17, %r2;
mul.wide.s32 %rd14, %r4, 4;
add.s64 %rd15, %rd4, %rd14;
ld.global.u32 %r5, [%rd15];
setp.eq.s32 %p3, %r5, -1;
@%p3 bra $L__BB0_11;
mul.wide.s32 %rd16, %r5, 4;
add.s64 %rd17, %rd2, %rd16;
ld.global.f32 %f26, [%rd17];
sub.f32 %f5, %f26, %f1;
add.s64 %rd18, %rd1, %rd16;
ld.global.f32 %f27, [%rd18];
sub.f32 %f6, %f27, %f2;
mul.f32 %f28, %f6, %f6;
fma.rn.f32 %f7, %f5, %f5, %f28;
add.s64 %rd20, %rd3, %rd14;
ld.global.f32 %f29, [%rd20];
mul.f32 %f30, %f29, %f29;
sub.f32 %f8, %f7, %f30;
setp.leu.f32 %p4, %f29, 0f00000000;
setp.leu.f32 %p5, %f7, 0f358637BD;
or.pred %p6, %p4, %p5;
@%p6 bra $L__BB0_8;
abs.f32 %f9, %f8;
setp.leu.f32 %p7, %f9, 0f3F800000;
mov.f32 %f62, %f9;
@%p7 bra $L__BB0_7;
rcp.rn.f32 %f62, %f9;
$L__BB0_7:
mul.rn.f32 %f31, %f62, %f62;
mov.f32 %f32, 0fC0B59883;
mov.f32 %f33, 0fBF52C7EA;
fma.rn.f32 %f34, %f31, %f33, %f32;
mov.f32 %f35, 0fC0D21907;
fma.rn.f32 %f36, %f34, %f31, %f35;
mul.f32 %f37, %f31, %f36;
mul.f32 %f38, %f62, %f37;
add.f32 %f39, %f31, 0f41355DC0;
mov.f32 %f40, 0f41E6BD60;
fma.rn.f32 %f41, %f39, %f31, %f40;
mov.f32 %f42, 0f419D92C8;
fma.rn.f32 %f43, %f41, %f31, %f42;
rcp.rn.f32 %f44, %f43;
fma.rn.f32 %f45, %f38, %f44, %f62;
mov.f32 %f46, 0f3FC90FDB;
sub.f32 %f47, %f46, %f45;
setp.gt.f32 %p8, %f9, 0f3F800000;
selp.f32 %f48, %f47, %f45, %p8;
mov.b32 %r13, %f48;
mov.b32 %r14, %f8;
and.b32 %r15, %r14, -2147483648;
or.b32 %r16, %r15, %r13;
mov.b32 %f49, %r16;
setp.le.f32 %p9, %f9, 0f7F800000;
selp.f32 %f50, %f49, %f48, %p9;
mul.f32 %f51, %f50, 0f3C23D70A;
sqrt.rn.f32 %f52, %f7;
div.rn.f32 %f53, %f51, %f52;
fma.rn.f32 %f67, %f5, %f53, %f67;
fma.rn.f32 %f68, %f6, %f53, %f68;
$L__BB0_8:
@%p5 bra $L__BB0_10;
mov.f32 %f54, 0f3F800000;
max.f32 %f55, %f7, %f54;
mov.f32 %f56, 0f3C23D70A;
div.rn.f32 %f57, %f56, %f55;
mul.f32 %f58, %f5, %f57;
sub.f32 %f67, %f67, %f58;
mul.f32 %f59, %f6, %f57;
sub.f32 %f68, %f68, %f59;
$L__BB0_10:
add.s32 %r17, %r17, 1;
setp.lt.s32 %p11, %r17, %r7;
@%p11 bra $L__BB0_3;
$L__BB0_11:
cvta.to.global.u64 %rd21, %rd6;
cvta.to.global.u64 %rd22, %rd5;
mul.wide.s32 %rd23, %r1, 4;
add.s64 %rd24, %rd22, %rd23;
st.global.f32 [%rd24], %f67;
add.s64 %rd25, %rd21, %rd23;
st.global.f32 [%rd25], %f68;
$L__BB0_12:
ret;
}

@ -0,0 +1,223 @@
use cust::prelude::*;
use std::fs::File;
use std::io::Read;
async fn main() {
// Initialize CUDA context
let _ctx = cust::quick_init().unwrap();
// Load the CUDA module
let mut file = File::open("kernel.ptx").unwrap();
let mut ptx = String::new();
file.read_to_string(&mut ptx).unwrap();
let module = Module::load_from_string(&ptx).unwrap();
let stream = Stream::new(StreamFlags::DEFAULT, None).unwrap();
let mut graph = load_graph();
let mut desired_distance_matrix = HashMap::new();
graph.node_indices().for_each(|idx| {
desired_distance_matrix.insert(idx, dijkstra(&graph, idx, None, |_| 1.0 as f32));
});
loop {
let num_nodes = graph.node_count();
let max_neighbors = 10; // Adjust based on your data
// Prepare data for GPU
let positions_x: Vec<f32> = graph.node_weights().map(|(_, pos)| pos.x).collect();
let positions_y: Vec<f32> = graph.node_weights().map(|(_, pos)| pos.y).collect();
let mut forces_x = vec![0.0f32; num_nodes];
let mut forces_y = vec![0.0f32; num_nodes];
let neighbors = vec![-1; num_nodes * max_neighbors]; // Placeholder for neighbors
let distances = vec![0.0f32; num_nodes * max_neighbors]; // Placeholder for distances
// TODO: Fill neighbors and distances arrays based on your graph structure
// Allocate device memory
let positions_x_device = positions_x.as_slice().as_dvec().unwrap();
let positions_y_device = positions_y.as_slice().as_dvec().unwrap();
let forces_x_device = forces_x.as_mut_slice().as_dvec().unwrap();
let forces_y_device = forces_y.as_mut_slice().as_dvec().unwrap();
let neighbors_device = neighbors.as_slice().as_dvec().unwrap();
let distances_device = distances.as_slice().as_dvec().unwrap();
// Launch the CUDA kernel
unsafe {
launch!(module.compute_forces<<<num_nodes / 256 + 1, 256, 0, stream>>>(
positions_x_device.as_device_ptr(),
positions_y_device.as_device_ptr(),
forces_x_device.as_device_ptr(),
forces_y_device.as_device_ptr(),
neighbors_device.as_device_ptr(),
distances_device.as_device_ptr(),
num_nodes as i32,
max_neighbors as i32
))
.unwrap();
}
stream.synchronize().unwrap();
// Copy results back to host
forces_x_device.copy_to(&mut forces_x).unwrap();
forces_y_device.copy_to(&mut forces_y).unwrap();
// Update node positions
for (i, (force_x, force_y)) in forces_x.iter().zip(forces_y.iter()).enumerate() {
let (_, pos) = graph.node_weight_mut(NodeIndex::new(i)).unwrap();
pos.x += force_x;
pos.y += force_y;
}
// Render
let now = Instant::now();
clear_background(WHITE);
draw_graph(&graph);
let elapsed = now.elapsed();
println!("frame: {:?}", elapsed);
next_frame().await;
}
}
fn load_graph() -> StableGraph<(String, Point2<f32>), ()> {
println!("Loading graph");
let mut graph = StableGraph::new();
let file = std::fs::File::open(env::args().nth(1).expect("missing gfa file argument")).unwrap();
let entries = parser::parse_source(file).unwrap();
let mut index_map = HashMap::new();
let node_count = entries
.iter()
.filter_map(|entry| match entry {
Entry::Segment { id, .. } => Some(id),
_ => None,
})
.count();
println!("Node count: {}", node_count);
let radius = (node_count as f32).sqrt();
let mut i = -10.0;
for entry in entries
.iter()
.filter(|entry| matches!(entry, Entry::Link { .. }))
.take(3000)
{
// println!("{:?}", entry);
if let Entry::Link {
from,
from_orient,
to,
to_orient,
} = entry
{
// add first node if not present
let a = index_map
.entry(from.clone())
.or_insert_with(|| {
i += 1.0;
graph.add_node((
format!("{}{}", from, from_orient),
Point2::new(rand::gen_range(0.0, radius), rand::gen_range(0.0, radius)),
// Point2::new(i, 50.0 + rand::gen_range(0.0, 100.0)),
))
})
.to_owned();
// add second node if not present
let b = index_map
.entry(to.clone())
.or_insert_with(|| {
i += 1.0;
graph.add_node((
format!("{}{}", from, to_orient),
Point2::new(rand::gen_range(0.0, radius), rand::gen_range(0.0, radius)),
// Point2::new(i, 50.0 + rand::gen_range(0.0, 100.0)),
))
})
.to_owned();
graph.add_edge(a, b, ());
graph.add_edge(b, a, ());
}
}
println!("Loading completed");
graph
}
fn draw_graph(graph: &StableGraph<(String, Point2<f32>), ()>) {
let (width, height) = (screen_width(), screen_height());
let (min_x, max_x) = graph
.node_weights()
.map(|(_, pos)| pos.x)
.fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), x| {
(min.min(x), max.max(x))
});
let (min_y, max_y) = graph
.node_weights()
.map(|(_, pos)| pos.y)
.fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), y| {
(min.min(y), max.max(y))
});
let source_range: f32 = (max_x - min_x).max(max_y - min_y);
for idx in graph.edge_indices() {
let ((_, source), (_, target)) = graph
.edge_endpoints(idx)
.map(|(a, b)| (graph.node_weight(a).unwrap(), graph.node_weight(b).unwrap()))
.unwrap();
draw_line(
remap(source.x, min_x, min_x + source_range, 10.0, width - 10.0),
remap(source.y, min_y, min_y + source_range, 10.0, height - 10.0),
remap(target.x, min_x, min_x + source_range, 10.0, width - 10.0),
remap(target.y, min_y, min_y + source_range, 10.0, height - 10.0),
1.0,
BLACK,
);
}
for (_label, pos) in graph.node_weights() {
let x = remap(pos.x, min_x, min_x + source_range, 10.0, width - 10.0);
let y = remap(pos.y, min_y, min_y + source_range, 10.0, height - 10.0);
draw_circle(x, y, 2.0, RED);
// draw_text(label.as_str(), x - 30.0, y - 30.0, 10.0, BLACK);
}
draw_line(
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
remap(100.0, min_x, min_x + source_range, 10.0, width - 10.0),
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
2.0,
BLUE,
);
draw_line(
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
remap(0.0, min_y, min_y + source_range, 10.0, height - 10.0),
remap(0.0, min_x, min_x + source_range, 10.0, width - 10.0),
remap(100.0, min_y, min_y + source_range, 10.0, height - 10.0),
2.0,
BLUE,
);
}
fn remap(value: f32, from_min: f32, from_max: f32, to_min: f32, to_max: f32) -> f32 {
(value - from_min) / (from_max - from_min) * (to_max - to_min) + to_min
}
Loading…
Cancel
Save