>I can offer you to write the low levels ASM functions. Nice ThanksBut I dont have a Vampire yet I will buy a standalone as soon as available Anyway I would be intellectualy interested about you optimize a 3D rendering function I have already wrote about the Cow3D vampire version with low fpu usage (I mean not for drawing) EXTERNAL LINK EXTERNAL LINK In this program the most crucial part is in FillPoly function and especially in the fill an horizontal segment loop that start with NLOOP(dx) and end with LineDone: This is my horrible low level C that looks like ASM ;-) /*==================================================================*/ #define ASRGBA64(ptr) (*((double*)ptr)) #define ASRGBA32(ptr) (*(( ULONG*)ptr)) #define ASRGB16(ptr) (*(( UWORD*)ptr)) #define MUL8(src,dst,c,sa,da) dst[c]=( ((ULONG) ( ((UWORD)src[c])*sa + ((UWORD)dst[c])*da ))>>8) #define MULCOL(c,sa,da) MUL8(T0.B.RGBA,D0.B.RGBA,c,sa,da) #define MULRGB(sa,da) if(sa==255) {D0.L.RGBA32=T0.L.RGBA32;} else { MULCOL(0,sa,da); MULCOL(1,sa,da); MULCOL(2,sa,da); } #define MOD8(src,dst,c) dst[c]=( ((ULONG) ( ((UWORD)src[c])*((UWORD)dst[c]) ))>>8) #define MODCOL(c) MOD8(C0.B.RGBA,T0.B.RGBA,c) #define MODRGBA { MODCOL(0); MODCOL(1); MODCOL(2); MODCOL(3); } #define SRC_A (T0.B.RGBA[3]) /*==================================================================*/ void FillPoly(struct Context3D *Ctx,struct FixPoint3D *Edge1,struct FixPoint3D *Edge2) { register LONG x,dx; register LONG z,dz; register LONG u,du; register LONG v,dv; register LONG w,dw; register LONG r,dr; register LONG g,dg; register LONG b,db; register LONG a,da; register ULONG m,n; register LONG y; register ULONG *Tex32; register ULONG *Dst32; register ULONG *Dst32X; register LONG *Zbuf32; register LONG *Zbuf32X; register ULONG sline; register ULONG dline; register union Rgba3D T0; /* texel */ register union Rgba3D D0; /* destination pixel from screen */ register union Rgba3D C0; /* current gouraud */ //..............|.................................. // .............|..#@.............................. // ..Out........|.#..@..........Clipped Triangle... // ..of screen..|#....@.........to fill............ // .............x#.....@........between edges...... // ............x.#......@.......................... // ...........x..#.......@......................... // ..........x...#........@........................ // .........x....#.Edge1...@.Edge2................. // ..........x...#..........@...........In Screen.. // ............ x#...........@..................... // .............|.#...........@.................... // .............|....#.........@................... // .........Clip|......#........@.................. // -------------|---------x------x-------Clip------ // .............|............x....x................ // .............|...............x..x...Out......... // .............|..................xx..of screen... //..............|.................................. FUNC y=Ctx->PolyY; Edge1=&Edge1[y]; Edge2=&Edge2[y]; /* Init Texture & Screen pointers */ Tex32=(ULONG*)Ctx->T->pixels; sline=Ctx->T->w; Dst32=(ULONG*)Ctx->pixels; dline=Ctx->w; Zbuf32=(LONG*)Ctx->zbuffer; /* for poly height fill each horizontal segments */ MLOOP(Ctx->PolyHigh) { x =(Edge1->x); /* first-pixel x */ dx=(Edge2->x - Edge1->x)+1; /* horizontal segment size in pixels */ if(dx < 1) goto LineDone; /* get first-pixel value for each "channels" */ z=(Edge1->z); u=(Edge1->u); v=(Edge1->v); w=(Edge1->w); r=(Edge1->r); g=(Edge1->g); b=(Edge1->b); a=(Edge1->a); /* compute linear distance (delta) for each "channels" among x */ dz=(((Edge2->z>>16) - (Edge1->z>>16))<<16)/dx; du=(((Edge2->u>>16) - (Edge1->u>>16))<<16)/dx; dv=(((Edge2->v>>16) - (Edge1->v>>16))<<16)/dx; dw=(((Edge2->w>>16) - (Edge1->w>>16))<<16)/dx; dr=(((Edge2->r>>16) - (Edge1->r>>16))<<16)/dx; dg=(((Edge2->g>>16) - (Edge1->g>>16))<<16)/dx; db=(((Edge2->b>>16) - (Edge1->b>>16))<<16)/dx; da=(((Edge2->a>>16) - (Edge1->a>>16))<<16)/dx; Dst32X =&Dst32 [y*dline + x]; /* Pointer on the start of the segment on screen */ Zbuf32X=&Zbuf32[y*dline + x]; /* Pointer on the start of the segment on zbuffer*/ /* fill an horizontal segment */ NLOOP(dx) { if(z < Zbuf32X[n]) /* do a W3D_Z_LESS test */ { Zbuf32X[n]=z; T0.L.RGBA32=Tex32[ (v>>16)*sline + (u>>16)]; /* get texel */ if(C.UseGouraud) { C0.L.RGBA32=((r>>16)<<24)+((g>>16)<<16)+((b>>16)<<8)+((a>>16)); /* get current gouraud color as RGBA32*/ MODRGBA /* modulate effect for Texture/Color */ } if(SRC_A) /* if texel alpha (else is invisible for alpha==0) */ { D0.L.RGBA32=Dst32X[n]; MULRGB(SRC_A,(255-SRC_A)); /* do alpha transparency for Texture/Screen */ Dst32X[n]=D0.L.RGBA32; } } /* next pixel: new values for each "channels" */ z=z+dz; u=u+du; v=v+dv; w=w+dw; r=r+dr; g=g+dg; b=b+db; a=a+da; } LineDone: Edge1++; /* next line of poly */ Edge2++; y++; } }
|