|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "fungi.hpp"
|
|
|
#include "fungi_Paremetres.hpp"
|
|
|
#include <cuda_runtime.h>
|
|
|
#include <curand_kernel.h>
|
|
|
#include <algorithm>
|
|
|
#include <random>
|
|
|
#include <numeric>
|
|
|
#include <cmath>
|
|
|
#include <stdexcept>
|
|
|
|
|
|
static inline void CK(cudaError_t st, const char* msg){
|
|
|
if (st != cudaSuccess) throw std::runtime_error(std::string("[CUDA] ")+msg+": "+cudaGetErrorString(st));
|
|
|
}
|
|
|
|
|
|
|
|
|
__device__ inline float d_phi_gauss(int x, int y, float cx, float cy,
|
|
|
float s, float a, float th){
|
|
|
|
|
|
float dx = x - cx, dy = y - cy;
|
|
|
float c = cosf(th), s0 = sinf(th);
|
|
|
float rx = a/(s*s), ry = (1.f/a)/(s*s);
|
|
|
float xr = c*dx + s0*dy;
|
|
|
float yr = -s0*dx + c*dy;
|
|
|
float q = 0.5f*(xr*xr*rx + yr*yr*ry);
|
|
|
return __expf(-q);
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_reward_map(const float* grad, int H,int W,
|
|
|
const float* fx,const float* fy,const float* fs,
|
|
|
const float* fa,const float* fth,
|
|
|
float* R, int F){
|
|
|
int h = blockIdx.x*blockDim.x + threadIdx.x; if (h>=F) return;
|
|
|
float cx=fx[h], cy=fy[h], s=fs[h], a=fa[h], th=fth[h];
|
|
|
float acc = 0.f;
|
|
|
for(int y=0;y<H;y++){
|
|
|
for(int x=0;x<W;x++){
|
|
|
float ph = d_phi_gauss(x,y,cx,cy,s,a,th);
|
|
|
acc += ph * fabsf(grad[y*W + x]);
|
|
|
}
|
|
|
}
|
|
|
R[h] = +acc;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void k_gravity_forces(const float* fx,const float* fy,const float* mass,
|
|
|
float* ax,float* ay, int F, float G, float eps2){
|
|
|
int i = blockIdx.x*blockDim.x + threadIdx.x; if (i>=F) return;
|
|
|
float xi=fx[i], yi=fy[i], mi = fmaxf(mass[i], 1e-3f);
|
|
|
float ax_i=0.f, ay_i=0.f;
|
|
|
for(int j=0;j<F;j++){
|
|
|
if (j==i) continue;
|
|
|
float dx=fx[j]-xi, dy=fy[j]-yi;
|
|
|
float r2 = dx*dx + dy*dy + eps2;
|
|
|
float invr = rsqrtf(r2);
|
|
|
float invr3 = invr*invr*invr;
|
|
|
float Fg = G * mi * mass[j] * invr3;
|
|
|
ax_i += Fg * dx / mi;
|
|
|
ay_i += Fg * dy / mi;
|
|
|
}
|
|
|
ax[i]=ax_i; ay[i]=ay_i;
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_apply_motion(float* fx,float* fy,
|
|
|
float* vx,float* vy,
|
|
|
const float* ax,const float* ay,
|
|
|
int F, int W,int H, float dt, float damp){
|
|
|
int i=blockIdx.x*blockDim.x+threadIdx.x; if(i>=F) return;
|
|
|
float vxi = (vx?vx[i]:0.f), vyi=(vy?vy[i]:0.f);
|
|
|
vxi = (vxi + dt*ax[i]) * damp;
|
|
|
vyi = (vyi + dt*ay[i]) * damp;
|
|
|
float xi = fx[i] + dt*vxi;
|
|
|
float yi = fy[i] + dt*vyi;
|
|
|
|
|
|
if (xi<0){ xi=0; vxi=-vxi; } if (xi>W-1){ xi=W-1; vxi=-vxi; }
|
|
|
if (yi<0){ yi=0; vyi=-vyi; } if (yi>H-1){ yi=H-1; vyi=-vyi; }
|
|
|
fx[i]=xi; fy[i]=yi; if(vx) vx[i]=vxi; if(vy) vy[i]=vyi;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void k_energy_growth_mark(float* fx,float* fy,float* fs,float* fa,float* fth,
|
|
|
float* a0,float* p0,
|
|
|
float* energy,float* mass,int* age,
|
|
|
const float* R, int* deathFlag, int F,
|
|
|
float food,float decay,float death_th,float cost){
|
|
|
int h=blockIdx.x*blockDim.x+threadIdx.x; if(h>=F) return;
|
|
|
float e = energy[h]*decay + food*R[h] - cost*(1.f + 0.01f*fs[h]*fs[h]);
|
|
|
energy[h]=e; age[h] += 1;
|
|
|
|
|
|
float g=tanhf(e);
|
|
|
fs[h] = fmaxf(1.0f, fs[h]*(1.f + 0.05f*g));
|
|
|
|
|
|
deathFlag[h] = (e<death_th) ? 1 : 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static inline float _xover_mut(float a, float b, std::mt19937& rng, float mut_std){
|
|
|
std::uniform_real_distribution<float> U(0.f,1.f);
|
|
|
std::normal_distribution<float> N(0.f, mut_std);
|
|
|
float t = U(rng) < 0.5f ? a : b;
|
|
|
return t + N(rng);
|
|
|
}
|
|
|
static inline float _clip(float v, float lo, float hi){ return std::max(lo, std::min(hi, v)); }
|
|
|
|
|
|
|
|
|
static void _spawn_child(const FungiSoA& P, int i, int j,
|
|
|
FungiSoA& out, std::mt19937& rng,
|
|
|
float mut_pos=0.5f, float mut_par=0.2f){
|
|
|
FungiSoA& R = out;
|
|
|
R.F += 1;
|
|
|
|
|
|
auto reserveF = [&](std::vector<float>& v){ if((int)v.size()<R.F) v.resize(R.F); };
|
|
|
auto reserveI = [&](std::vector<int>& v){ if((int)v.size()<R.F) v.resize(R.F); };
|
|
|
reserveF(R.x); reserveF(R.y); reserveF(R.sigma); reserveF(R.alpha); reserveF(R.theta);
|
|
|
reserveF(R.a_base); reserveF(R.p_base); reserveF(R.energy); reserveF(R.mass); reserveI(R.age);
|
|
|
|
|
|
int k = R.F-1;
|
|
|
|
|
|
R.x[k] = _clip(_xover_mut(P.x[i], P.x[j], rng, mut_pos), 0.f, (float)(R.W-1));
|
|
|
R.y[k] = _clip(_xover_mut(P.y[i], P.y[j], rng, mut_pos), 0.f, (float)(R.H-1));
|
|
|
R.sigma[k] = _clip(_xover_mut(P.sigma[i], P.sigma[j], rng, mut_par), 1.0f, 10.0f);
|
|
|
R.alpha[k] = _clip(_xover_mut(P.alpha[i], P.alpha[j], rng, 0.1f), 0.3f, 3.0f);
|
|
|
R.theta[k] = _xover_mut(P.theta[i], P.theta[j], rng, 0.2f);
|
|
|
R.a_base[k] = _xover_mut(P.a_base[i],P.a_base[j], rng, 0.1f);
|
|
|
R.p_base[k] = _xover_mut(P.p_base[i],P.p_base[j], rng, 0.1f);
|
|
|
|
|
|
R.energy[k] = 0.0f;
|
|
|
R.mass[k] = 0.5f*(P.mass[i]+P.mass[j]) * 0.3f;
|
|
|
R.age[k] = 0;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void _pair_and_reproduce(FungiSoA& P, const EvoParams& evo){
|
|
|
std::vector<int> ids(P.F); std::iota(ids.begin(), ids.end(), 0);
|
|
|
std::sort(ids.begin(), ids.end(), [&](int a,int b){ return P.energy[a] > P.energy[b]; });
|
|
|
|
|
|
std::mt19937 rng(evo.seed ^ 0xBADC0DEu);
|
|
|
FungiSoA out = P;
|
|
|
|
|
|
auto dist = [&](int i,int j){
|
|
|
float dx=P.x[i]-P.x[j], dy=P.y[i]-P.y[j];
|
|
|
return std::sqrt(dx*dx+dy*dy);
|
|
|
};
|
|
|
|
|
|
int maxPairs = std::max(1, P.F/8);
|
|
|
int made=0;
|
|
|
for (size_t idx=0; idx+1<ids.size() && made<maxPairs; ++idx){
|
|
|
int i = ids[idx], j = ids[idx+1];
|
|
|
if (i==j) continue;
|
|
|
|
|
|
if (P.energy[i] <= 0.f || P.energy[j] <= 0.f) continue;
|
|
|
if (dist(i,j) > evo.pair_dist) continue;
|
|
|
|
|
|
for (int c=0; c<evo.offspring_per_pair; ++c){
|
|
|
_spawn_child(P, i, j, out, rng);
|
|
|
}
|
|
|
|
|
|
P.mass[i] *= 0.85f; P.mass[j] *= 0.85f;
|
|
|
P.sigma[i]*= 0.95f; P.sigma[j]*= 0.95f;
|
|
|
P.energy[i]*= 0.7f; P.energy[j]*= 0.7f;
|
|
|
made++;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<int> keep(out.F, 1);
|
|
|
for(int h=0; h<out.F; ++h){
|
|
|
if (out.energy[h] < -2.0f || out.mass[h] < 0.05f) keep[h]=0;
|
|
|
}
|
|
|
|
|
|
std::vector<int> order; order.reserve(out.F);
|
|
|
for(int h=0; h<out.F; ++h) if(keep[h]) order.push_back(h);
|
|
|
|
|
|
|
|
|
int cap = evo.max_population>0 ? evo.max_population : (int)std::floor(1.5 * (double)P.F);
|
|
|
if ((int)order.size() > cap){
|
|
|
std::sort(order.begin(), order.end(), [&](int a,int b){ return out.energy[a] > out.energy[b]; });
|
|
|
order.resize(cap);
|
|
|
}
|
|
|
|
|
|
|
|
|
auto pick = [&](std::vector<float>& v){
|
|
|
std::vector<float> nv; nv.reserve(order.size());
|
|
|
for(int id: order) nv.push_back(v[id]); v.swap(nv);
|
|
|
};
|
|
|
auto picki = [&](std::vector<int>& v){
|
|
|
std::vector<int> nv; nv.reserve(order.size());
|
|
|
for(int id: order) nv.push_back(v[id]); v.swap(nv);
|
|
|
};
|
|
|
pick(out.x); pick(out.y); pick(out.sigma); pick(out.alpha); pick(out.theta);
|
|
|
pick(out.a_base); pick(out.p_base); pick(out.energy); pick(out.mass); picki(out.age);
|
|
|
out.F = (int)order.size();
|
|
|
P = std::move(out);
|
|
|
}
|
|
|
|
|
|
|
|
|
void fungi_ecology_step(FungiSoA& pop, const float* d_grad_map, const EvoParams& evo){
|
|
|
const int F=pop.F, H=pop.H, W=pop.W;
|
|
|
if (F<=0) return;
|
|
|
|
|
|
|
|
|
float *fx,*fy,*fs,*fa,*fth,*a0,*p0,*E,*M; int *Age;
|
|
|
CK(cudaMalloc(&fx,sizeof(float)*F),"alloc fx");
|
|
|
CK(cudaMalloc(&fy,sizeof(float)*F),"alloc fy");
|
|
|
CK(cudaMalloc(&fs,sizeof(float)*F),"alloc fs");
|
|
|
CK(cudaMalloc(&fa,sizeof(float)*F),"alloc fa");
|
|
|
CK(cudaMalloc(&fth,sizeof(float)*F),"alloc th");
|
|
|
CK(cudaMalloc(&a0,sizeof(float)*F),"alloc a0");
|
|
|
CK(cudaMalloc(&p0,sizeof(float)*F),"alloc p0");
|
|
|
CK(cudaMalloc(&E, sizeof(float)*F),"alloc E");
|
|
|
CK(cudaMalloc(&M, sizeof(float)*F),"alloc M");
|
|
|
CK(cudaMalloc(&Age,sizeof(int)*F),"alloc Age");
|
|
|
|
|
|
CK(cudaMemcpy(fx, pop.x.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D fx");
|
|
|
CK(cudaMemcpy(fy, pop.y.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D fy");
|
|
|
CK(cudaMemcpy(fs, pop.sigma.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D fs");
|
|
|
CK(cudaMemcpy(fa, pop.alpha.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D fa");
|
|
|
CK(cudaMemcpy(fth,pop.theta.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D th");
|
|
|
CK(cudaMemcpy(a0, pop.a_base.data(),sizeof(float)*F, cudaMemcpyHostToDevice),"H2D a0");
|
|
|
CK(cudaMemcpy(p0, pop.p_base.data(),sizeof(float)*F, cudaMemcpyHostToDevice),"H2D p0");
|
|
|
CK(cudaMemcpy(E, pop.energy.data(),sizeof(float)*F, cudaMemcpyHostToDevice),"H2D E");
|
|
|
CK(cudaMemcpy(M, pop.mass.data(), sizeof(float)*F, cudaMemcpyHostToDevice),"H2D M");
|
|
|
CK(cudaMemcpy(Age,pop.age.data(), sizeof(int)*F, cudaMemcpyHostToDevice),"H2D Age");
|
|
|
|
|
|
|
|
|
float* dR=nullptr; CK(cudaMalloc(&dR,sizeof(float)*F),"alloc R");
|
|
|
k_reward_map<<<(F+255)/256,256>>>(d_grad_map, H,W, fx,fy,fs,fa,fth, dR, F);
|
|
|
|
|
|
|
|
|
float *ax,*ay,*vx,*vy; CK(cudaMalloc(&ax,sizeof(float)*F),"alloc ax");
|
|
|
CK(cudaMalloc(&ay,sizeof(float)*F),"alloc ay");
|
|
|
CK(cudaMalloc(&vx,sizeof(float)*F),"alloc vx");
|
|
|
CK(cudaMalloc(&vy,sizeof(float)*F),"alloc vy");
|
|
|
CK(cudaMemset(vx,0,sizeof(float)*F),"zero vx");
|
|
|
CK(cudaMemset(vy,0,sizeof(float)*F),"zero vy");
|
|
|
|
|
|
k_gravity_forces<<<(F+255)/256,256>>>(fx,fy,M, ax,ay, F, evo.G, evo.eps2);
|
|
|
k_apply_motion<<<(F+255)/256,256>>>(fx,fy, vx,vy, ax,ay, F, W,H, evo.dt, evo.damp);
|
|
|
|
|
|
|
|
|
int* dDead=nullptr; CK(cudaMalloc(&dDead,sizeof(int)*F),"alloc deadFlag");
|
|
|
CK(cudaMemset(dDead,0,sizeof(int)*F),"zero deadFlag");
|
|
|
k_energy_growth_mark<<<(F+255)/256,256>>>(fx,fy,fs,fa,fth, a0,p0, E,M,Age,
|
|
|
dR, dDead, F,
|
|
|
evo.food,evo.decay,evo.death_th,evo.cost);
|
|
|
|
|
|
|
|
|
std::vector<int> hDead(F);
|
|
|
CK(cudaMemcpy(pop.x.data(), fx, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H fx");
|
|
|
CK(cudaMemcpy(pop.y.data(), fy, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H fy");
|
|
|
CK(cudaMemcpy(pop.sigma.data(), fs, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H fs");
|
|
|
CK(cudaMemcpy(pop.alpha.data(), fa, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H fa");
|
|
|
CK(cudaMemcpy(pop.theta.data(), fth,sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H th");
|
|
|
CK(cudaMemcpy(pop.a_base.data(),a0, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H a0");
|
|
|
CK(cudaMemcpy(pop.p_base.data(),p0, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H p0");
|
|
|
CK(cudaMemcpy(pop.energy.data(),E, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H E");
|
|
|
CK(cudaMemcpy(pop.mass.data(), M, sizeof(float)*F, cudaMemcpyDeviceToHost),"D2H M");
|
|
|
CK(cudaMemcpy(pop.age.data(), Age, sizeof(int)*F, cudaMemcpyDeviceToHost),"D2H Age");
|
|
|
CK(cudaMemcpy(hDead.data(), dDead, sizeof(int)*F, cudaMemcpyDeviceToHost),"D2H deadFlag");
|
|
|
|
|
|
|
|
|
cudaFree(dR); cudaFree(ax); cudaFree(ay); cudaFree(vx); cudaFree(vy); cudaFree(dDead);
|
|
|
cudaFree(fx); cudaFree(fy); cudaFree(fs); cudaFree(fa); cudaFree(fth);
|
|
|
cudaFree(a0); cudaFree(p0); cudaFree(E); cudaFree(M); cudaFree(Age);
|
|
|
|
|
|
|
|
|
std::vector<int> keep; keep.reserve(pop.F);
|
|
|
for (int i=0;i<pop.F;i++) if(!hDead[i]) keep.push_back(i);
|
|
|
auto pick = [&](std::vector<float>& v){
|
|
|
std::vector<float> nv; nv.reserve(keep.size());
|
|
|
for(int id: keep) nv.push_back(v[id]); v.swap(nv);
|
|
|
};
|
|
|
auto picki=[&](std::vector<int>& v){
|
|
|
std::vector<int> nv; nv.reserve(keep.size());
|
|
|
for(int id: keep) nv.push_back(v[id]); v.swap(nv);
|
|
|
};
|
|
|
pick(pop.x); pick(pop.y); pick(pop.sigma); pick(pop.alpha); pick(pop.theta);
|
|
|
pick(pop.a_base); pick(pop.p_base); pick(pop.energy); pick(pop.mass); picki(pop.age);
|
|
|
pop.F = (int)keep.size();
|
|
|
|
|
|
|
|
|
_pair_and_reproduce(pop, evo);
|
|
|
}
|
|
|
|
|
|
|
|
|
#include "fungi.hpp"
|
|
|
#include <cuda_runtime.h>
|
|
|
#include <curand_kernel.h>
|
|
|
#include <algorithm>
|
|
|
#include <numeric>
|
|
|
#include <cmath>
|
|
|
#include <stdexcept>
|
|
|
|
|
|
static inline void ck(cudaError_t st, const char* msg){
|
|
|
if (st != cudaSuccess) { throw std::runtime_error(std::string("[CUDA] ")+msg+": "+cudaGetErrorString(st)); }
|
|
|
}
|
|
|
|
|
|
void FungiSoA::resize(int F_, int H_, int W_) {
|
|
|
F = F_; H=H_; W=W_;
|
|
|
x.resize(F); y.resize(F); sigma.resize(F); alpha.resize(F); theta.resize(F);
|
|
|
a_base.resize(F); p_base.resize(F);
|
|
|
energy.assign(F, 0.f); mass.assign(F, 1.f); age.assign(F, 0);
|
|
|
}
|
|
|
void FungiSoA::init_random(unsigned seed, float sigma_min, float sigma_max) {
|
|
|
std::mt19937 rng(seed);
|
|
|
std::uniform_real_distribution<float> Ux(0.f, (float)(W-1));
|
|
|
std::uniform_real_distribution<float> Uy(0.f, (float)(H-1));
|
|
|
std::uniform_real_distribution<float> Us(sigma_min, sigma_max);
|
|
|
std::uniform_real_distribution<float> Ua(0.7f, 1.3f);
|
|
|
std::uniform_real_distribution<float> Ut(-3.1415926f, 3.1415926f);
|
|
|
std::normal_distribution<float> N01(0.f, 0.15f);
|
|
|
for (int i=0;i<F;i++){
|
|
|
x[i]=Ux(rng); y[i]=Uy(rng); sigma[i]=Us(rng);
|
|
|
alpha[i]=Ua(rng); theta[i]=Ut(rng);
|
|
|
a_base[i]=N01(rng); p_base[i]=N01(rng);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
void FungiSoA::adjust_population(int newF, unsigned seed) {
|
|
|
if (newF < 1) newF = 1;
|
|
|
if (newF == F) return;
|
|
|
|
|
|
if (newF < F) {
|
|
|
std::vector<int> idx(F);
|
|
|
std::iota(idx.begin(), idx.end(), 0);
|
|
|
std::partial_sort(idx.begin(), idx.begin() + newF, idx.end(), [&](int a, int b){
|
|
|
return energy[a] > energy[b];
|
|
|
});
|
|
|
idx.resize(newF);
|
|
|
auto reorderF = [&](std::vector<float>& v){
|
|
|
std::vector<float> nv; nv.reserve(newF);
|
|
|
for (int id : idx) nv.push_back(v[id]);
|
|
|
v.swap(nv);
|
|
|
};
|
|
|
auto reorderI = [&](std::vector<int>& v){
|
|
|
std::vector<int> nv; nv.reserve(newF);
|
|
|
for (int id : idx) nv.push_back(v[id]);
|
|
|
v.swap(nv);
|
|
|
};
|
|
|
reorderF(x); reorderF(y); reorderF(sigma); reorderF(alpha); reorderF(theta);
|
|
|
reorderF(a_base); reorderF(p_base); reorderF(energy); reorderF(mass);
|
|
|
reorderI(age);
|
|
|
F = newF;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
int oldF = F;
|
|
|
x.resize(newF); y.resize(newF); sigma.resize(newF); alpha.resize(newF); theta.resize(newF);
|
|
|
a_base.resize(newF); p_base.resize(newF); energy.resize(newF); mass.resize(newF); age.resize(newF);
|
|
|
|
|
|
std::mt19937 rng(seed ^ (unsigned)newF);
|
|
|
std::uniform_real_distribution<float> Ux(0.f, (float)(W>0?W-1:0));
|
|
|
std::uniform_real_distribution<float> Uy(0.f, (float)(H>0?H-1:0));
|
|
|
std::uniform_real_distribution<float> Us(1.5f, 6.5f);
|
|
|
std::uniform_real_distribution<float> Ua(0.5f, 1.6f);
|
|
|
std::uniform_real_distribution<float> Ut(-3.1415926f, 3.1415926f);
|
|
|
std::normal_distribution<float> Namp(0.f, 0.3f);
|
|
|
|
|
|
for (int i = oldF; i < newF; ++i) {
|
|
|
x[i] = (W > 0) ? Ux(rng) : 0.f;
|
|
|
y[i] = (H > 0) ? Uy(rng) : 0.f;
|
|
|
sigma[i] = Us(rng);
|
|
|
alpha[i] = Ua(rng);
|
|
|
theta[i] = Ut(rng);
|
|
|
a_base[i] = Namp(rng);
|
|
|
p_base[i] = Namp(rng);
|
|
|
energy[i] = 0.f;
|
|
|
mass[i] = 1.f;
|
|
|
age[i] = 0;
|
|
|
}
|
|
|
F = newF;
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_clear(float* A, int N, float v=0.f){ int i=blockIdx.x*blockDim.x+threadIdx.x; if(i<N) A[i]=v; }
|
|
|
|
|
|
|
|
|
__device__ inline float phi_gauss(int x, int y, float cx, float cy, float s, float a, float th){
|
|
|
float dx = x - cx, dy = y - cy;
|
|
|
float c = cosf(th), s0 = sinf(th);
|
|
|
float rx = a/(s*s), ry = (1.f/a)/(s*s);
|
|
|
|
|
|
float xr = c*dx + s0*dy;
|
|
|
float yr = -s0*dx + c*dy;
|
|
|
float q = 0.5f*(xr*xr*rx + yr*yr*ry);
|
|
|
return __expf(-q);
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_build_masks(const float* fx,const float* fy,const float* fs,const float* fa,const float* fth,
|
|
|
const float* fa0,const float* fp0, int F,
|
|
|
float* A, float* P, int H, int W){
|
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
int N = H*W; if (idx>=N) return;
|
|
|
int y = idx / W, x = idx % W;
|
|
|
float sa=0.f, sp=0.f;
|
|
|
#pragma unroll 4
|
|
|
for(int h=0; h<F; ++h){
|
|
|
float ph = phi_gauss(x,y, fx[h],fy[h],fs[h],fa[h],fth[h]);
|
|
|
sa += fa0[h] * ph;
|
|
|
sp += fp0[h] * ph;
|
|
|
}
|
|
|
|
|
|
|
|
|
float Araw = fminf(log1pf(expf(fminf(sa, 2.0f))), 2.0f) + 1e-3f;
|
|
|
float Praw = 3.1415926f * tanhf(fminf(fmaxf(sp, -1.0f), 1.0f));
|
|
|
A[idx] = Araw;
|
|
|
P[idx] = Praw;
|
|
|
}
|
|
|
|
|
|
void fungi_build_masks_GPU(const FungiSoA& pop,
|
|
|
float* d_A, float* d_P,
|
|
|
int , int ) {
|
|
|
|
|
|
float *dx,*dy,*ds,*da,*dt,*dab,*dpb;
|
|
|
ck(cudaMalloc(&dx, sizeof(float)*pop.F), "alloc fx");
|
|
|
ck(cudaMalloc(&dy, sizeof(float)*pop.F), "alloc fy");
|
|
|
ck(cudaMalloc(&ds, sizeof(float)*pop.F), "alloc fs");
|
|
|
ck(cudaMalloc(&da, sizeof(float)*pop.F), "alloc fa");
|
|
|
ck(cudaMalloc(&dt, sizeof(float)*pop.F), "alloc fth");
|
|
|
ck(cudaMalloc(&dab,sizeof(float)*pop.F), "alloc a0");
|
|
|
ck(cudaMalloc(&dpb,sizeof(float)*pop.F), "alloc p0");
|
|
|
ck(cudaMemcpy(dx, pop.x.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D fx");
|
|
|
ck(cudaMemcpy(dy, pop.y.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D fy");
|
|
|
ck(cudaMemcpy(ds, pop.sigma.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D fs");
|
|
|
ck(cudaMemcpy(da, pop.alpha.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D fa");
|
|
|
ck(cudaMemcpy(dt, pop.theta.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D fth");
|
|
|
ck(cudaMemcpy(dab,pop.a_base.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D a0");
|
|
|
ck(cudaMemcpy(dpb,pop.p_base.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice), "H2D p0");
|
|
|
|
|
|
int N = pop.H*pop.W;
|
|
|
k_clear<<<(N+255)/256,256>>>(d_A, N, 0.f);
|
|
|
k_clear<<<(N+255)/256,256>>>(d_P, N, 0.f);
|
|
|
k_build_masks<<<(N+255)/256,256>>>(dx,dy,ds,da,dt, dab,dpb,pop.F, d_A,d_P, pop.H,pop.W);
|
|
|
cudaFree(dx); cudaFree(dy); cudaFree(ds); cudaFree(da); cudaFree(dt); cudaFree(dab); cudaFree(dpb);
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_rng_init(curandStatePhilox4_32_10_t* st, unsigned seed, int n){
|
|
|
int i=blockIdx.x*blockDim.x+threadIdx.x; if(i<n) curand_init(seed, i, 0, &st[i]);
|
|
|
}
|
|
|
|
|
|
|
|
|
__global__ void k_reward(const float* grad, int H, int W,
|
|
|
const float* fx,const float* fy,const float* fs,const float* fa,const float* fth,
|
|
|
float* R, int F){
|
|
|
int h = blockIdx.x*blockDim.x+threadIdx.x; if(h>=F) return;
|
|
|
float cx=fx[h], cy=fy[h], s=fs[h], a=fa[h], th=fth[h];
|
|
|
float acc=0.f;
|
|
|
for(int y=0;y<H;y++){
|
|
|
for(int x=0;x<W;x++){
|
|
|
float ph = phi_gauss(x,y,cx,cy,s,a,th);
|
|
|
acc += ph * fabsf(grad[y*W+x]);
|
|
|
}
|
|
|
}
|
|
|
R[h] = +acc;
|
|
|
}
|
|
|
|
|
|
__global__ void k_evolve(float* fx,float* fy,float* fs,float* fa,float* fth,
|
|
|
float* a0,float* p0,float* energy,float* mass,int* age,
|
|
|
const float* R, int F, int H, int W,
|
|
|
float food,float decay,float death_th,float cost,
|
|
|
curandStatePhilox4_32_10_t* rng){
|
|
|
int h = blockIdx.x*blockDim.x+threadIdx.x; if(h>=F) return;
|
|
|
curandStatePhilox4_32_10_t st = rng[h];
|
|
|
|
|
|
float e = energy[h]*decay + food*R[h] - cost*(1.f + 0.01f*fs[h]*fs[h]);
|
|
|
energy[h] = e;
|
|
|
age[h] += 1;
|
|
|
|
|
|
|
|
|
float g = tanhf(e);
|
|
|
fs[h] = fmaxf(1.0f, fs[h] * (1.f + 0.05f*g));
|
|
|
|
|
|
|
|
|
float dx=(curand_uniform(&st)-0.5f)*1.0f, dy=(curand_uniform(&st)-0.5f)*1.0f;
|
|
|
fx[h] = fminf(fmaxf(fx[h] + dx, 0.f), (float)(W-1));
|
|
|
fy[h] = fminf(fmaxf(fy[h] + dy, 0.f), (float)(H-1));
|
|
|
|
|
|
|
|
|
if(e < death_th){
|
|
|
fx[h]=curand_uniform(&st)*(W-1.f);
|
|
|
fy[h]=curand_uniform(&st)*(H-1.f);
|
|
|
fs[h]=1.5f + 4.0f*curand_uniform(&st);
|
|
|
fa[h]=0.8f + 0.6f*curand_uniform(&st);
|
|
|
fth[h]=(curand_uniform(&st)*2.f-1.f)*3.1415926f;
|
|
|
a0[h]+= (curand_uniform(&st)-0.5f)*0.2f;
|
|
|
p0[h]+= (curand_uniform(&st)-0.5f)*0.2f;
|
|
|
energy[h]=0.f; mass[h]=1.f; age[h]=0;
|
|
|
}
|
|
|
rng[h]=st;
|
|
|
}
|
|
|
|
|
|
void fungi_evolve_GPU(FungiSoA& pop,
|
|
|
const float* d_grad_map,
|
|
|
int evo_pairs,
|
|
|
float food, float decay, float death_th,
|
|
|
float cost, unsigned seed){
|
|
|
(void)evo_pairs;
|
|
|
|
|
|
float *fx,*fy,*fs,*fa,*fth,*a0,*p0,*E,*M; int *Age;
|
|
|
ck(cudaMalloc(&fx,sizeof(float)*pop.F),"alloc fx"); ck(cudaMalloc(&fy,sizeof(float)*pop.F),"alloc fy");
|
|
|
ck(cudaMalloc(&fs,sizeof(float)*pop.F),"alloc fs"); ck(cudaMalloc(&fa,sizeof(float)*pop.F),"alloc fa");
|
|
|
ck(cudaMalloc(&fth,sizeof(float)*pop.F),"alloc th"); ck(cudaMalloc(&a0,sizeof(float)*pop.F),"alloc a0");
|
|
|
ck(cudaMalloc(&p0,sizeof(float)*pop.F),"alloc p0"); ck(cudaMalloc(&E,sizeof(float)*pop.F),"alloc E");
|
|
|
ck(cudaMalloc(&M,sizeof(float)*pop.F),"alloc M"); ck(cudaMalloc(&Age,sizeof(int)*pop.F),"alloc age");
|
|
|
|
|
|
ck(cudaMemcpy(fx, pop.x.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D fx");
|
|
|
ck(cudaMemcpy(fy, pop.y.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D fy");
|
|
|
ck(cudaMemcpy(fs, pop.sigma.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D fs");
|
|
|
ck(cudaMemcpy(fa, pop.alpha.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D fa");
|
|
|
ck(cudaMemcpy(fth,pop.theta.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D th");
|
|
|
ck(cudaMemcpy(a0, pop.a_base.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D a0");
|
|
|
ck(cudaMemcpy(p0, pop.p_base.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D p0");
|
|
|
ck(cudaMemcpy(E, pop.energy.data(),sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D E");
|
|
|
ck(cudaMemcpy(M, pop.mass.data(), sizeof(float)*pop.F, cudaMemcpyHostToDevice),"H2D M");
|
|
|
ck(cudaMemcpy(Age,pop.age.data(), sizeof(int)*pop.F, cudaMemcpyHostToDevice),"H2D age");
|
|
|
|
|
|
|
|
|
float* dR=nullptr; ck(cudaMalloc(&dR,sizeof(float)*pop.F),"alloc R");
|
|
|
k_reward<<<(pop.F+255)/256,256>>>(d_grad_map, pop.H,pop.W, fx,fy,fs,fa,fth, dR, pop.F);
|
|
|
|
|
|
|
|
|
curandStatePhilox4_32_10_t* rng; ck(cudaMalloc(&rng,sizeof(curandStatePhilox4_32_10_t)*pop.F),"alloc rng");
|
|
|
k_rng_init<<<(pop.F+255)/256,256>>>(rng, seed, pop.F);
|
|
|
k_evolve<<<(pop.F+255)/256,256>>>(fx,fy,fs,fa,fth, a0,p0,E,M,Age, dR, pop.F, pop.H,pop.W, food,decay,death_th,cost, rng);
|
|
|
|
|
|
|
|
|
ck(cudaMemcpy(pop.x.data(), fx, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H fx");
|
|
|
ck(cudaMemcpy(pop.y.data(), fy, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H fy");
|
|
|
ck(cudaMemcpy(pop.sigma.data(), fs, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H fs");
|
|
|
ck(cudaMemcpy(pop.alpha.data(), fa, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H fa");
|
|
|
ck(cudaMemcpy(pop.theta.data(), fth,sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H th");
|
|
|
ck(cudaMemcpy(pop.a_base.data(), a0,sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H a0");
|
|
|
ck(cudaMemcpy(pop.p_base.data(), p0,sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H p0");
|
|
|
ck(cudaMemcpy(pop.energy.data(), E, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H E");
|
|
|
ck(cudaMemcpy(pop.mass.data(), M, sizeof(float)*pop.F, cudaMemcpyDeviceToHost),"D2H M");
|
|
|
ck(cudaMemcpy(pop.age.data(), Age,sizeof(int)*pop.F, cudaMemcpyDeviceToHost),"D2H age");
|
|
|
|
|
|
cudaFree(dR); cudaFree(rng);
|
|
|
cudaFree(fx); cudaFree(fy); cudaFree(fs); cudaFree(fa); cudaFree(fth);
|
|
|
cudaFree(a0); cudaFree(p0); cudaFree(E); cudaFree(M); cudaFree(Age);
|
|
|
}
|
|
|
|
|
|
void download_mask(float* h, const float* d, int HW){
|
|
|
ck(cudaMemcpy(h,d,sizeof(float)*HW,cudaMemcpyDeviceToHost),"D2H mask");
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void fungi_create_test_pattern(float* h_pattern, int H, int W, int pattern_type) {
|
|
|
for (int y = 0; y < H; y++) {
|
|
|
for (int x = 0; x < W; x++) {
|
|
|
float val = 0.0f;
|
|
|
switch (pattern_type) {
|
|
|
case 0:
|
|
|
val = ((x/4 + y/4) % 2) ? 1.0f : 0.0f;
|
|
|
break;
|
|
|
case 1:
|
|
|
val = (float)x / (W-1);
|
|
|
break;
|
|
|
case 2:
|
|
|
{
|
|
|
float cx = W/2.0f, cy = H/2.0f;
|
|
|
float r = sqrtf((x-cx)*(x-cx) + (y-cy)*(y-cy));
|
|
|
val = 0.5f + 0.5f * sinf(r * 0.5f);
|
|
|
}
|
|
|
break;
|
|
|
case 3:
|
|
|
val = (x == W/2 && y == H/2) ? 1.0f : 0.0f;
|
|
|
break;
|
|
|
}
|
|
|
h_pattern[y*W + x] = val;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
void fungi_analyze_mask_statistics(const float* d_A, const float* d_P, int HW) {
|
|
|
std::vector<float> h_A(HW), h_P(HW);
|
|
|
ck(cudaMemcpy(h_A.data(), d_A, sizeof(float)*HW, cudaMemcpyDeviceToHost), "D2H A");
|
|
|
ck(cudaMemcpy(h_P.data(), d_P, sizeof(float)*HW, cudaMemcpyDeviceToHost), "D2H P");
|
|
|
|
|
|
|
|
|
float A_min = *std::min_element(h_A.begin(), h_A.end());
|
|
|
float A_max = *std::max_element(h_A.begin(), h_A.end());
|
|
|
float A_mean = std::accumulate(h_A.begin(), h_A.end(), 0.0f) / HW;
|
|
|
|
|
|
|
|
|
float P_min = *std::min_element(h_P.begin(), h_P.end());
|
|
|
float P_max = *std::max_element(h_P.begin(), h_P.end());
|
|
|
float P_mean = std::accumulate(h_P.begin(), h_P.end(), 0.0f) / HW;
|
|
|
|
|
|
printf("🔍 FUNGI MASK STATISTICS:\n");
|
|
|
printf(" Amplitude: min=%.3f, max=%.3f, mean=%.3f\n", A_min, A_max, A_mean);
|
|
|
printf(" Phase: min=%.3f, max=%.3f, mean=%.3f\n", P_min, P_max, P_mean);
|
|
|
printf(" A oversaturation: %s\n", (A_max > 1.9f) ? "⚠️ YES" : "✅ NO");
|
|
|
printf(" P range check: %s\n", (P_max > 3.2f || P_min < -3.2f) ? "⚠️ OUT OF RANGE" : "✅ OK");
|
|
|
}
|
|
|
|