#include "FragmentProgramARB10.h" #include #include //using namespace std; #if defined(__APPLE__) // OS X #define GL_EXT_vertex_shader 1 #define GL_GLEXT_FUNCTION_POINTERS 1 #include #include #include #include "extensions.h" #elif defined(sgi) // IRIX #include #include #include "extensions.h" //#include #elif defined(__linux__) // LINUX #include #include #include #include "extensions.h" #else // WIN32 #define DECLARE_EXTENSION_SUBSTANCE #include "extensions.h" #include "glext.h" #endif #if __APPLE__ extern glGetProgramivARBProcPtr pfglGetProgramivARB; extern glGenProgramsARBProcPtr pfglGenProgramsARB; extern glBindProgramARBProcPtr pfglBindProgramARB; extern glGetProgramivARBProcPtr pfglProgramStringARB; #endif unsigned char fragProgram0STD_[] = { // Standard fragment program header "!!ARBfp1.0\n" // "PARAM C0 = { 1.0, 2.0, 1.0, 1.0 };" // "PARAM C1 = { 1.0, 1.0, 1.0, 1.0 };" "OUTPUT outcolor = result.color;" "TEMP R0, R1, R2;" "MOV R0, fragment.texcoord[0];" // R0 = { cx, ?, ?, cy } // "MUL R2, R0, C1;" // R2 = { cx, ?, ?, 0.5*cy } "MOV R2, R0;" // R2 = { cx, ?, ?, cy } }; unsigned char fragProgram1STD_[] = { "MOV R0.z, -R0.w;" // R0 = { zx, ?, -zy, zy } "MAD R1, R0.xyzw, R0.xxxx, R2;" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R0.xyw, R0.wwwz, -R0.wwwx, R1.xyzw;" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } }; unsigned char fragProgram2STD_[] = { //"MOV outcolor.xyz, R0;" "MAD outcolor.xyz, R0, R1, R2;" "END" }; unsigned char* fragProgram0_ = fragProgram0STD_; unsigned char* fragProgram1_ = fragProgram1STD_; unsigned char* fragProgram2_ = fragProgram2STD_; // Test texture (not yet used) const unsigned char textureImage_[4*4*3] = { 0,0,0, 255,255,255, 0,0,0, 255,255,255, 255,0,255, 0,0,0, 255,255,255, 0,0,0, 0,0,0, 255,255,255, 0,0,0, 255,255,255, 255,255,255, 0,0,0, 255,255,255, 0,0,0, }; // Benjamin Lipchak swizzle optimizations. // GPU Fragment Program - D.Paccaloni & B.Lipchak ------------------- // The Radeon FP compiler seems to have some problems with complex swizzles, eating up too many ALU slots. // That's why we have to use 3 instructions per iteration :( // Note that for Fragment Programs to work, we don't have to enable GL_TEXTURE_2D ! unsigned char fragProgram0R300_[] = { // Standard fragment program header "!!ARBfp1.0\n" "OUTPUT outcolor = result.color;\n" "TEMP R0, R1, R2, R3;\n" "MOV R2, fragment.texcoord[0];\n" // R0 = { cx, ?, ?, cy } "\n" "MAD R1, R2.xyzw, R2.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R3.xy, R2.wwww, -R2.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "MAD R3.w, -R2.wwww, -R2.xxxx, R1.wwww;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" "MAD R1, R3.xyzw, R3.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "MAD R0.w, -R3.wwww, -R3.xxxx, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" }; unsigned char fragProgram1R300_[] = { "MAD R1, R0.xyzw, R0.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R3.xy, R0.wwww, -R0.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "MAD R3.w, -R0.wwww, -R0.xxxx, R1.wwww;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" "MAD R1, R3.xyzw, R3.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "MAD R0.w, -R3.wwww, -R3.xxxx, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" }; unsigned char fragProgram2R300_[] = { "MAD R1, R0.xyzw, R0.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R3.xy, R0.wwww, -R0.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "MAD R3.w, -R0.wwww, -R0.xxxx, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" "MOV R3.z, -R3.w;\n" // R0 = { zx, ?, -zy, zy } "MOV R0.z, -R3.w;\n" // R0 = { zx, ?, -zy, zy } "MAD R1, R3.xyzw, R3.xxxx, R2;\n" // R1 = { zx2 + cx, ?, -zx*zy, zx*zy + 0.5*cy } "MAD R0.xy, R3.wwww, -R3.wwww, R1.xyzw;\n" // R1 = { zx2 - zy2 + cx, ?, ?, zx*zy + zx*zy + 0.5*cy } "\n" "MAD outcolor.xyz, R0, R1, R2;\n" "END\n" }; unsigned char fragProgram_[640*1024]; // 640 KB will be enough for everyone ... :) FragmentProgramARB10::FragmentProgramARB10(int iters, int w, int h, double ax, double ay, double ex, double ey) : FragmentProgram(iters, w, h, ax, ay, ex, ey) { usingR3xx_ = false; // This code will work on PowerPC and Intel Macs // NOTE: The graphics are incorrectly colored on PowerPC Macs -- is this an endian issue? //#ifdef __APPLE__ // // TODO: Why the hell NSGLGetProcAddress() this return a function pointer of functions which crashes ? // // FragmentProgramARB10 is currently disabled on the Mac. // printf("If you can tell me why OpenGL extension functions are crashing on the Mac, you'll get your FragmentProgram.\n"); // isValid_ = false; // return; //#endif isValid_ = initialize(iters, w, h, ax, ay, ex, ey); } FragmentProgramARB10::~FragmentProgramARB10(void) { #if !defined(sgi) if (isValid_) { glDisable(GL_FRAGMENT_PROGRAM_ARB); glDisable(GL_TEXTURE_2D); glDeleteProgramsARB(1, &fpid_); } #endif } bool FragmentProgramARB10::initialize(int iters, int w, int h, double ax, double ay, double ex, double ey) { return initializeGPU_FP(iters, w, h, ax, ay, ex, ey); } bool FragmentProgramARB10::initializeGPU_FP(int iters, int w, int h, double ax, double ay, double ex, double ey) { #if !defined(sgi) // Initialize Fragment Program iters_ = iters; InitExtensionsARB(); if (!checkRequiredExtensions()) { printf("ARB Fragment Program: Required extensions are not supported.\n"); return false; } // Check GPU limits printf(" Maximum number of FP ALU instructions: "); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB, &maxInstr_); if (glGetError() == GL_NO_ERROR) printf("%d\n", maxInstr_); else printf("UNKNOWN\n"); GLint maxLocalConsts; printf(" Maximum number of FP native params: "); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_PARAMETERS_ARB, &maxLocalConsts); if (glGetError() == GL_NO_ERROR) printf("%d\n", maxLocalConsts); else printf("UNKNOWN\n"); // Check if GL impl is ATI's if (strstr((char*)glGetString(GL_VENDOR), "ATI")) { // Running on an ATI R3xx or better. // Enable R3xx optimizations. printf(" Running on an ATI R3xx or better.\n"); printf(" Enabling optimized scheduling (iters count must be even).\n"); usingR3xx_ = true; fragProgram0_ = fragProgram0R300_; fragProgram1_ = fragProgram1R300_; fragProgram2_ = fragProgram2R300_; if (iters < 4) { iters = 4; printf(" WARNING: R3xx code requires min 4 iters.\n"); } } //GLint maxLocals; //glGetIntegerv(GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCALS_EXT , &maxLocals); //cout << " Maximum number of FP locals: " << maxLocals << endl; // Generate Vertex Program for required number of iters char* vpPrt = (char*) fragProgram_; int len = sprintf(vpPrt, "%s", fragProgram0_); vpPrt += len; //iters = 22; int repliCount = iters; if (usingR3xx_) repliCount = (iters-4)/2; for (unsigned int i=0; iprepareWorldSpace(w, h, ax, ay, ex, ey); // Create texture //createTexture(); // Enable Vertex Programs glEnable(GL_FRAGMENT_PROGRAM_ARB); return true; #else return false; #endif } void FragmentProgramARB10::prepareWorldSpace(int w, int h, double ax, double ay, double ex, double ey) { #if !defined(sgi) // Recalc real plane parameters double sx = (ex - ax) / ((double) w); //sy_ = (ey_ - ay_) / ((double) h_); double sy = sx; //glMatrixMode(GL_MODELVIEW); //glOrtho(0, 1, 1, 0, -1, 1); glMatrixMode(GL_PROJECTION); glLoadIdentity(); //glEnable(GL_TEXTURE_2D); glEnable(GL_FRAGMENT_PROGRAM_ARB); this->setOrtho2D(ax, ex, ay+sy*(double)h, ay); #endif } bool FragmentProgramARB10::setOrtho2D(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top) { gluOrtho2D(left, right, bottom, top); //glTrackMatrixNV(GL_VERTEX_PROGRAM_NV, 4, GL_PROJECTION, GL_IDENTITY_NV); return true; }; bool FragmentProgramARB10::checkRequiredExtensions(void) { if (!CheckExtension("GL_ARB_fragment_program")) return false; return true; } bool FragmentProgramARB10::createTexture(void) { glGenTextures(1, &texid_); glBindTexture(GL_TEXTURE_2D, texid_); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexImage2D(GL_TEXTURE_2D, 0, 3, /*w*/4, /*h*/4, 0, GL_RGB, GL_UNSIGNED_BYTE, textureImage_); return true; }