smallpt: Global Illumination in 99 lines of C++

smallpt is a global illumination renderer. It is 99 lines of C++, is open source, and renders the above scene using unbiased Monte Carlo path tracing (click for full size).

Features

Global illumination via unbiased Monte Carlo path tracing

99 lines of 72-column (or less) open source C++ code

Multi-threading using OpenMP

Soft shadows from diffuse luminaire

Specular, Diffuse, and Glass BRDFs

Antialiasing via super-sampling with importance-sampled tent distribution, and 2x2 subpixels

Ray-sphere intersection

Modified Cornell box scene description

Cosine importance sampling of the hemisphere for diffuse reflection

Russian roulette for path termination

Russian roulette and splitting for selecting reflection and/or refraction for glass BRDF

With minor changes compiles to a 4 KB binary (less than 4096 bytes) Added 11/11/2010:

Modifications including explicit light sampling and non-branching ray tree.

Ports to CUDA and BSGP featuring interactive display and scene editing. Added 3/12/2012:

Presentation slides explaining each line, by David Cline

#include <math.h> #include <stdlib.h> #include <stdio.h> struct Vec { double x , y , z ; Vec ( double x_ = 0 , double y_ = 0 , double z_ = 0 ) { x = x_ ; y = y_ ; z = z_ ; } Vec operator +( const Vec & b ) const { return Vec ( x + b . x , y + b . y , z + b . z ); } Vec operator -( const Vec & b ) const { return Vec ( x - b . x , y - b . y , z - b . z ); } Vec operator *( double b ) const { return Vec ( x * b , y * b , z * b ); } Vec mult ( const Vec & b ) const { return Vec ( x * b . x , y * b . y , z * b . z ); } Vec & norm () { return * this = * this * ( 1 / sqrt ( x * x + y * y + z * z )); } double dot ( const Vec & b ) const { return x * b . x + y * b . y + z * b . z ; } Vec operator %( Vec & b ) { return Vec ( y * b . z - z * b . y , z * b . x - x * b . z , x * b . y - y * b . x ); } } ; struct Ray { Vec o , d ; Ray ( Vec o_ , Vec d_ ) : o ( o_ ), d ( d_ ) {} } ; enum Refl_t { DIFF , SPEC , REFR } ; struct Sphere { double rad ; Vec p , e , c ; Refl_t refl ; Sphere ( double rad_ , Vec p_ , Vec e_ , Vec c_ , Refl_t refl_ ): rad ( rad_ ), p ( p_ ), e ( e_ ), c ( c_ ), refl ( refl_ ) {} double intersect ( const Ray & r ) const { Vec op = p - r . o ; double t , eps = 1e-4 , b = op . dot ( r . d ), det = b * b - op . dot ( op )+ rad * rad ; if ( det < 0 ) return 0 ; else det = sqrt ( det ); return ( t = b - det )> eps ? t : (( t = b + det )> eps ? t : 0 ); } } ; Sphere spheres [] = { Sphere ( 1e5 , Vec ( 1e5 + 1 , 40.8 , 81.6 ), Vec (), Vec ( .75 , .25 , .25 ), DIFF ), Sphere ( 1e5 , Vec (- 1e5 + 99 , 40.8 , 81.6 ), Vec (), Vec ( .25 , .25 , .75 ), DIFF ), Sphere ( 1e5 , Vec ( 50 , 40.8 , 1e5 ), Vec (), Vec ( .75 , .75 , .75 ), DIFF ), Sphere ( 1e5 , Vec ( 50 , 40.8 ,- 1e5 + 170 ), Vec (), Vec (), DIFF ), Sphere ( 1e5 , Vec ( 50 , 1e5 , 81.6 ), Vec (), Vec ( .75 , .75 , .75 ), DIFF ), Sphere ( 1e5 , Vec ( 50 ,- 1e5 + 81.6 , 81.6 ), Vec (), Vec ( .75 , .75 , .75 ), DIFF ), Sphere ( 16.5 , Vec ( 27 , 16.5 , 47 ), Vec (), Vec ( 1 , 1 , 1 )* .999 , SPEC ), Sphere ( 16.5 , Vec ( 73 , 16.5 , 78 ), Vec (), Vec ( 1 , 1 , 1 )* .999 , REFR ), Sphere ( 600 , Vec ( 50 , 681.6 - .27 , 81.6 ), Vec ( 12 , 12 , 12 ), Vec (), DIFF ) } ; inline double clamp ( double x ) { return x < 0 ? 0 : x > 1 ? 1 : x ; } inline int toInt ( double x ) { return int ( pow ( clamp ( x ), 1 / 2.2 )* 255 + .5 ); } inline bool intersect ( const Ray & r , double & t , int & id ) { double n = sizeof ( spheres )/ sizeof ( Sphere ), d , inf = t = 1e20 ; for ( int i = int ( n ); i --;) if (( d = spheres [ i ]. intersect ( r ))&& d < t ) { t = d ; id = i ; } return t < inf ; } Vec radiance ( const Ray & r , int depth , unsigned short * Xi ) { double t ; int id = 0 ; if (! intersect ( r , t , id )) return Vec (); const Sphere & obj = spheres [ id ]; Vec x = r . o + r . d * t , n =( x - obj . p ). norm (), nl = n . dot ( r . d )< 0 ? n : n *- 1 , f = obj . c ; double p = f . x > f . y && f . x > f . z ? f . x : f . y > f . z ? f . y : f . z ; if (++ depth > 5 ) if ( erand48 ( Xi )< p ) f = f *( 1 / p ); else return obj . e ; if ( obj . refl == DIFF ) { double r1 = 2 * M_PI * erand48 ( Xi ), r2 = erand48 ( Xi ), r2s = sqrt ( r2 ); Vec w = nl , u =(( fabs ( w . x )> .1 ? Vec ( 0 , 1 ): Vec ( 1 ))% w ). norm (), v = w % u ; Vec d = ( u * cos ( r1 )* r2s + v * sin ( r1 )* r2s + w * sqrt ( 1 - r2 )). norm (); return obj . e + f . mult ( radiance ( Ray ( x , d ), depth , Xi )); } else if ( obj . refl == SPEC ) return obj . e + f . mult ( radiance ( Ray ( x , r . d - n * 2 * n . dot ( r . d )), depth , Xi )); Ray reflRay ( x , r . d - n * 2 * n . dot ( r . d )); bool into = n . dot ( nl )> 0 ; double nc = 1 , nt = 1.5 , nnt = into ? nc / nt : nt / nc , ddn = r . d . dot ( nl ), cos2t ; if (( cos2t = 1 - nnt * nnt *( 1 - ddn * ddn ))< 0 ) return obj . e + f . mult ( radiance ( reflRay , depth , Xi )); Vec tdir = ( r . d * nnt - n *(( into ? 1 :- 1 )*( ddn * nnt + sqrt ( cos2t )))). norm (); double a = nt - nc , b = nt + nc , R0 = a * a /( b * b ), c = 1 -( into ?- ddn : tdir . dot ( n )); double Re = R0 +( 1 - R0 )* c * c * c * c * c , Tr = 1 - Re , P = .25 + .5 * Re , RP = Re / P , TP = Tr /( 1 - P ); return obj . e + f . mult ( depth > 2 ? ( erand48 ( Xi )< P ? radiance ( reflRay , depth , Xi )* RP : radiance ( Ray ( x , tdir ), depth , Xi )* TP ) : radiance ( reflRay , depth , Xi )* Re + radiance ( Ray ( x , tdir ), depth , Xi )* Tr ); } int main ( int argc , char * argv []) { int w = 1024 , h = 768 , samps = argc == 2 ? atoi ( argv [ 1 ])/ 4 : 1 ; Ray cam ( Vec ( 50 , 52 , 295.6 ), Vec ( 0 ,- 0.042612 ,- 1 ). norm ()); Vec cx = Vec ( w * .5135 / h ), cy =( cx % cam . d ). norm ()* .5135 , r , * c = new Vec [ w * h ]; #pragma omp parallel for schedule(dynamic, 1) private(r) // OpenMP for ( int y = 0 ; y < h ; y ++) { fprintf ( stderr , " \r Rendering (%d spp) %5.2f%%" , samps * 4 , 100 .* y /( h - 1 )); for ( unsigned short x = 0 , Xi [ 3 ]= { 0 , 0 , y * y * y } ; x < w ; x ++) for ( int sy = 0 , i =( h - y - 1 )* w + x ; sy < 2 ; sy ++) for ( int sx = 0 ; sx < 2 ; sx ++, r = Vec ()) { for ( int s = 0 ; s < samps ; s ++) { double r1 = 2 * erand48 ( Xi ), dx = r1 < 1 ? sqrt ( r1 )- 1 : 1 - sqrt ( 2 - r1 ); double r2 = 2 * erand48 ( Xi ), dy = r2 < 1 ? sqrt ( r2 )- 1 : 1 - sqrt ( 2 - r2 ); Vec d = cx *( ( ( sx + .5 + dx )/ 2 + x )/ w - .5 ) + cy *( ( ( sy + .5 + dy )/ 2 + y )/ h - .5 ) + cam . d ; r = r + radiance ( Ray ( cam . o + d * 140 , d . norm ()), 0 , Xi )*( 1 ./ samps ); } c [ i ] = c [ i ] + Vec ( clamp ( r . x ), clamp ( r . y ), clamp ( r . z ))* .25 ; } } FILE * f = fopen ( "image.ppm" , "w" ); fprintf ( f , "P3

%d %d

%d

" , w , h , 255 ); for ( int i = 0 ; i < w * h ; i ++) fprintf ( f , "%d %d %d " , toInt ( c [ i ]. x ), toInt ( c [ i ]. y ), toInt ( c [ i ]. z )); }

Other formats

smallpt.cpp (plain text)

smallpt4k.cpp (4 KB executable version)

smallpt.tar.gz (982 KB) Full distribution. Includes the above source files, Makefile, README, LICENSE, and resulting rendered image (losslessly converted to PNG).

Usage

g++ -O3 -fopenmp smallpt.cpp -o smallpt time ./smallpt 5000 display image.ppm

The argument to smallpt ("5000") is the number of samples per pixel, and must be greater than 4. If you don't have ImageMagick's display command available, you can use other viewers such as gthumb, gwenview, xv, gimp, etc.

GCC 4.2 or newer is required for multi-threading, however older versions of gcc will still work without threading. Remove the "-fopenmp" flag to disable threading support.

smallpt compiles with GCC 4.2 down to a 16 KB executable and produces a 1024x768 resolution image. Rendering using 5000 paths per pixel takes 2.1 hours on an Intel Core 2 Quad machine.

A slightly modified version is provided which compiles to a 4 KB (4049 bytes) compressed executable. Do "make smallpt4k" to build. See smallpt4k.cpp and the Makefile in the full distribution for more information. Note sstrip and p7zip are required.

If self assembly is used, the binary is only 2.7 KB (2666 bytes). Do "make smallptSA" to build.

Details



8 spp

13 sec 40 spp

63 sec 200 spp

5 min 1000 spp

25 min 5000 spp

124 min 25000 spp

10.3 hrs Timings and resulting images for different numbers of samples per pixel (spp) on a 2.4 GHz Intel Core 2 Quad CPU using 4 threads.

The box scene is constructed out of nine very large spheres which overlap. The camera is very close to their surface so they appear flat, yet each wall is actually the side of a sphere. The light is another sphere poking through the ceiling which is why it's round instead of the normal square. The black area visible in the mirror ball is the front of the box colored black to appear like empty space.

The image is computed by solving the rendering equation using numerical integration. The specific algorithm is Monte Carlo path tracing with Russian roulette for path termination. I highly recommend Shirley's and Jensen's execellent books (linked below) for explanations of these ideas. Due to size constraints and simplicity, explicit light sampling is not used, nor any ray intersection acceleration data structure.

Parallelism is achieved using an OpenMP #pragma to dynamically allocate rows of the image to different threads, with a thread for each processor or core. The variable Xi is used to store the state of the random number generator erand48() , and is seeded using an arbitrary function of the row number to decorrelate (at least visually) the sequences from row-to-row. In this way the sequences are deterministic and consistent from run to run, and independent of which thread is executing and in what order the rows are executed.

Anti-aliasing is done using supersampling, which removes all the jaggies except around the light. These are handled by using 2x2 subpixels which are clamped and then averaged.

Instead of fclose()ing the file, I exploit the C++ standard which calls return(0) implicitly, which in turn calls exit(0), which flushes and closes open files.

More Scenes





explicit.cpp Huge speedup, especially for small lights. Adds explicit light sampling with 23 additional lines of code and a small function signature change. Produces this image in 10 seconds on a Intel Core i7 920 quad-core CPU using 16 samples per pixel.

speedup, especially for small lights. Adds explicit light sampling with 23 additional lines of code and a small function signature change. Produces this image in 10 seconds on a Intel Core i7 920 quad-core CPU using 16 samples per pixel. forward.cpp Revision of radiance() function that removes all recursion and uses only a simple loop and no path branching. That is, the ray tree is always one ray wide.

function that removes all recursion and uses only a simple loop and no path branching. That is, the ray tree is always one ray wide. Single precision float support by Christopher Kulla. Mostly fixes crazy artifacts that appear when single precision floats are used, by avoiding self intersection. There is still a light leak at at the top of the scene though.

Last update: 11/5/2014

Initial version: 4/29/2007

Comments

Please enable JavaScript to view the comments powered by Disqus.

Disqus