psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 01, 2013, 10:08:44 AM

Last edit: July 30, 2013, 12:59:20 PM by psychocoder #1



Summary of informations from this thread:



Reposetory with my changes for rpcminer-mod (only Cuda):

Reposetory with all changes and windows support (CUDA und OpenCL) (administrated by charliemaggot):



Known CUDA Errors unter Windows:

- cudart32_50.dll or cudart32_50.dll is missing -> install

- bitcoinminercuda.cpp:174 crash -> this means that the kernel run longer than windows allow, to solv this add the paramter -gpugrid 256 or other number to the parameters



Original Post:



I have changed the code of rpcminer-mod

First, since cuda 5.0 we have a rotated function inside of the ptx (parallel asm), we must not add this by hand because the compiler find it automaticly.



Example: (((x ) >> bits) | (x << (32 - bits))) is compiled to ptx command shf.l.wrap.b32



Note: The changes are not comatible with the opencl version, I only change the cuda source. In CMAKEList.txt is hard coded that sm_35 (Kepler code) is created.



Run new code with this parameter: -gpu=0 -aggression=8 -gpugrid=2048 -gputhreads=256



To install the patch in your code goto root of the project and run $ patch -p1 < patch.txt



Now you get over 300MHash/s out of a Kepler GPU, I think that GTX Kepler GPUs are faster than K20 HPC version. It can be that you must use sm_30 for GTX Kepler GPUs.



[EDIT:] for 330MHash/s the GPU needs 138 Watt power.



psychocoder



patch.txt

Code: diff -Naur ./cmake-rpcminer/CMakeLists.txt ../../rpcminer-cuda_svn//cmake-rpcminer/CMakeLists.txt

--- ./cmake-rpcminer/CMakeLists.txt 2013-01-28 19:27:46.000000000 +0100

+++ ../../rpcminer-cuda_svn//cmake-rpcminer/CMakeLists.txt 2013-04-01 11:22:19.000000000 +0200

@@ -32,6 +32,10 @@



IF(BITCOIN_ENABLE_CUDA)

ADD_DEFINITIONS(-D_BITCOIN_MINER_CUDA_)

+ #SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_35,code=sm_35)

+ SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_35,code=sm_35 -Xptxas=-v -Xopencc=-LIST:source=on)

+ #SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_13,code=sm_13 -Xptxas=-v -Xopencc=-LIST:source=on)

+

CUDA_ADD_EXECUTABLE(rpcminer ${BITCOIN_RPC_MINER_SRC} ${BITCOIN_RPC_MINER_CUDA_SRC})

# Install generated PTX CUDA module

INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/${generated_file_basename}.ptx" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/rpcminer-cuda" RENAME "bitcoinminercuda.ptx")

diff -Naur ./patch.txt ../../rpcminer-cuda_svn//patch.txt

--- ./patch.txt 2013-04-01 11:30:47.000000000 +0200

+++ ../../rpcminer-cuda_svn//patch.txt 1970-01-01 01:00:00.000000000 +0100

@@ -1,14 +0,0 @@

-diff -Naur ./cmake-rpcminer/CMakeLists.txt ../../rpcminer-cuda_svn//cmake-rpcminer/CMakeLists.txt

---- ./cmake-rpcminer/CMakeLists.txt 2013-01-28 19:27:46.000000000 +0100

-+++ ../../rpcminer-cuda_svn//cmake-rpcminer/CMakeLists.txt 2013-04-01 11:22:19.000000000 +0200

-@@ -32,6 +32,10 @@

-

- IF(BITCOIN_ENABLE_CUDA)

- ADD_DEFINITIONS(-D_BITCOIN_MINER_CUDA_)

-+ #SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_35,code=sm_35)

-+ SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_35,code=sm_35 -Xptxas=-v -Xopencc=-LIST:source=on)

-+ #SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode=arch=compute_13,code=sm_13 -Xptxas=-v -Xopencc=-LIST:source=on)

-+

- CUDA_ADD_EXECUTABLE(rpcminer ${BITCOIN_RPC_MINER_SRC} ${BITCOIN_RPC_MINER_CUDA_SRC})

- # Install generated PTX CUDA module

- INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/${generated_file_basename}.ptx" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/rpcminer-cuda" RENAME "bitcoinminercuda.ptx")

diff -Naur ./src/cuda/bitcoinminercuda.cpp ../../rpcminer-cuda_svn//src/cuda/bitcoinminercuda.cpp

--- ./src/cuda/bitcoinminercuda.cpp 2013-01-28 19:27:46.000000000 +0100

+++ ../../rpcminer-cuda_svn//src/cuda/bitcoinminercuda.cpp 2013-04-01 11:22:30.000000000 +0200

@@ -290,6 +290,8 @@

{

AllocateResources(m_numb,m_numt);

}

+ m_out[0].m_bestnonce=0;

+ cuMemcpyHtoD(m_devout,m_out,/*m_numb*m_numt*/sizeof(cuda_out));



cuMemcpyHtoD(m_devin,m_in,sizeof(cuda_in));



@@ -319,11 +321,11 @@

cuFuncSetBlockShape(m_function,m_numt,1,1);

cuLaunchGrid(m_function,m_numb,1);



- cuMemcpyDtoH(m_out,m_devout,m_numb*m_numt*sizeof(cuda_out));

+ cuMemcpyDtoH(m_out,m_devout,/*m_numb*m_numt*/sizeof(cuda_out));



// very unlikely that we will find more than 1 hash with H=0

// so we'll just return the first one and not even worry about G

- for(int i=0; i<m_numb*m_numt; i++)

+ for(int i=0; i<1/*m_numb*m_numt*/; i++)

{

if(m_out[i].m_bestnonce!=0)// && m_out[i].m_bestg<bestg)

{

diff -Naur ./src/cuda/bitcoinminercuda.cu ../../rpcminer-cuda_svn//src/cuda/bitcoinminercuda.cu

--- ./src/cuda/bitcoinminercuda.cu 2013-01-28 19:27:46.000000000 +0100

+++ ../../rpcminer-cuda_svn//src/cuda/bitcoinminercuda.cu 2013-04-01 11:22:30.000000000 +0200

@@ -18,20 +18,31 @@



#include "cudashared.h"



-#define byteswap(x) (((x>>24) & 0x000000ff) | ((x>>8) & 0x0000ff00) | ((x<<8) & 0x00ff0000) | ((x<<24) & 0xff000000))

-#define rotateright(x,bits) (((x & 0xffffffff) >> bits) | (x << (32 - bits)))

-#define R(x) (work[x] = (rotateright(work[x-2],17)^rotateright(work[x-2],19)^((work[x-2]&0xffffffff)>>10)) + work[x - 7] + (rotateright(work[x-15],7)^rotateright(work[x-15],18)^((work[x-15]&0xffffffff)>>3)) + work[x - 16])

+#define rotateright(x,bits) (((x ) >> bits) | (x << (32 - bits)))

+

+#define R(x) (work[x] = (rotateright(work[x-2],17)^rotateright(work[x-2],19)^((work[x-2])>>10)) + work[x - 7] + (rotateright(work[x-15],7)^rotateright(work[x-15],18)^((work[x-15])>>3)) + work[x - 16])

+

#define sharound(a,b,c,d,e,f,g,h,x,K) {t1=h+(rotateright(e,6)^rotateright(e,11)^rotateright(e,25))+(g^(e&(f^g)))+K+x; t2=(rotateright(a,2)^rotateright(a,13)^rotateright(a,22))+((a&b)|(c&(a|b))); d+=t1; h=t1+t2;}



-extern "C" __global__ void cuda_process(cuda_in *in, cuda_out *out, const unsigned int loops, const unsigned int bits)

+extern "C" __global__ void cuda_process(cuda_in __restrict__ *in, cuda_out __restrict__ *out, const unsigned int loops, const unsigned int bits)

{

+ /*variable to check if any other block has a solution*/

+ __shared__ unsigned int canExit;

+ if(threadIdx.x==0)

+ canExit=out[0].m_bestnonce;

+ __syncthreads();

+ /*exit as fast as posible if one block has finished with solution*/

+ if(canExit!=0) return;

+



unsigned int work[64];

unsigned int A,B,C,D,E,F,G,H;

const unsigned int myid=(blockIdx.x*blockDim.x+threadIdx.x);

const unsigned int nonce=in->m_nonce + (myid << bits);

unsigned int t1,t2;

- unsigned int bestnonce=0;

+ //unsigned int bestnonce=0;

+

+

//unsigned int bestg=~0;



unsigned int* in_m_AH = in->m_AH;

@@ -40,46 +51,42 @@

unsigned int in_m_nbits = in->m_nbits;



// the first 3 rounds we can do outside the loop because they depend on work[0] through work[2] which won't change

- unsigned int A1,B1,C1,D1,E1,F1,G1,H1;

- A1=in_m_AH[0];

- B1=in_m_AH[1];

- C1=in_m_AH[2];

- D1=in_m_AH[3];

- E1=in_m_AH[4];

- F1=in_m_AH[5];

- G1=in_m_AH[6];

- H1=in_m_AH[7];

- sharound(A1,B1,C1,D1,E1,F1,G1,H1,in_m_merkle,0x428A2F98);

- sharound(H1,A1,B1,C1,D1,E1,F1,G1,in_m_ntime,0x71374491);

- sharound(G1,H1,A1,B1,C1,D1,E1,F1,in_m_nbits,0xB5C0FBCF);

-

- #pragma unroll 1

- for(unsigned int it=0; it<loops; it++)

+ /* move old A1, ... H1 to shared to solve registers

+ * can also calculated on host and give to kernel, because its se same for all threads and blocks

+ */

+ __shared__ unsigned int AH[8];

+ __shared__ unsigned int AH2[8]; //cache for second round

+ if(threadIdx.x<8)

+ {

+ AH2[threadIdx.x]=AH[threadIdx.x]=in_m_AH[threadIdx.x];

+ }

+ __syncthreads();

+ if(threadIdx.x==0)

+ {

+ sharound(AH[0],AH[1],AH[2],AH[3],AH[4],AH[5],AH[6],AH[7],in_m_merkle,0x428A2F98);

+ sharound(AH[7],AH[0],AH[1],AH[2],AH[3],AH[4],AH[5],AH[6],in_m_ntime,0x71374491);

+ sharound(AH[6],AH[7],AH[0],AH[1],AH[2],AH[3],AH[4],AH[5],in_m_nbits,0xB5C0FBCF);

+ }

+ __syncthreads();

+

+ #pragma unroll 1

+ for(unsigned int it=0; it<loops; ++it)

{

- /*

- A=in_m_AH[0];

- B=in_m_AH[1];

- C=in_m_AH[2];

- D=in_m_AH[3];

- E=in_m_AH[4];

- F=in_m_AH[5];

- G=in_m_AH[6];

- H=in_m_AH[7];

- */

- A=A1;

- B=B1;

- C=C1;

- D=D1;

- E=E1;

- F=F1;

- G=G1;

- H=H1;

+ if(out[0].m_bestnonce!=0) return;



+ A=AH[0];

+ B=AH[1];

+ C=AH[2];

+ D=AH[3];

+ E=AH[4];

+ F=AH[5];

+ G=AH[6];

+ H=AH[7];

work[0]=in_m_merkle;

work[1]=in_m_ntime;

work[2]=in_m_nbits;

//work[3]=byteswap(nonce+it);

- work[3]=nonce + it;

+ work[3]=nonce +it;

work[4]=0x80000000;

work[5]=0x00000000;

work[6]=0x00000000;

@@ -160,14 +167,14 @@



// hash the hash now



- work[0]=in_m_AH[0]+A;

- work[1]=in_m_AH[1]+B;

- work[2]=in_m_AH[2]+C;

- work[3]=in_m_AH[3]+D;

- work[4]=in_m_AH[4]+E;

- work[5]=in_m_AH[5]+F;

- work[6]=in_m_AH[6]+G;

- work[7]=in_m_AH[7]+H;

+ work[0]=AH2[0]+A;

+ work[1]=AH2[1]+B;

+ work[2]=AH2[2]+C;

+ work[3]=AH2[3]+D;

+ work[4]=AH2[4]+E;

+ work[5]=AH2[5]+F;

+ work[6]=AH2[6]+G;

+ work[7]=AH2[7]+H;

work[8]=0x80000000;

work[9]=0x00000000;

work[10]=0x00000000;

@@ -258,13 +265,15 @@



if((H==0))// && (G<=bestg))

{

- bestnonce=nonce+it;

+ //bestnonce=nonce+it;

+ atomicExch(&(out[0].m_bestnonce),nonce+it); /*we only need one solution*/

+

//bestg=G;

}



}



- out[myid].m_bestnonce=bestnonce;

+ //out[myid].m_bestnonce=bestnonce;

//out[myid].m_bestg=bestg;



}



Hi,Reposetory with my changes for rpcminer-mod (only Cuda): https://github.com/psychocoderHPC/rpcminer-mod Reposetory with all changes and windows support (CUDA und OpenCL) (administrated by charliemaggot): https://github.com/cdmackie/rpcminer-mod Known CUDA Errors unter Windows:- cudart32_50.dll or cudart32_50.dll is missing -> install https://developer.nvidia.com/cuda-downloads to solve this problem- bitcoinminercuda.cpp:174 crash -> this means that the kernel run longer than windows allow, to solv this add the paramter -gpugrid 256 or other number to the parametersI have changed the code of rpcminer-mod https://github.com/Ang3lus/rpcminer-mod a little bit thus we get better performance on Kepler GPUs.First, since cuda 5.0 we have a rotated function inside of the ptx (parallel asm), we must not add this by hand because the compiler find it automaticly.Example:is compiled to ptx commandNote: The changes are not comatible with the opencl version, I only change the cuda source. In CMAKEList.txt is hard coded that sm_35 (Kepler code) is created.Run new code with this parameter: -gpu=0 -aggression=8 -gpugrid=2048 -gputhreads=256To install the patch in your code goto root of the project and run $ patch -p1 < patch.txtNow you get over 300MHash/s out of a Kepler GPU, I think that GTX Kepler GPUs are faster than K20 HPC version. It can be that you must use sm_30 for GTX Kepler GPUs.[EDIT:] for 330MHash/s the GPU needs 138 Watt power.psychocoderpatch.txt

AWARD-WINNING

CASINO CRYPTO EXCLUSIVE

CLUBHOUSE 1500+

GAMES 2 MIN

CASH-OUTS 24/7

SUPPORT 100s OF

FREE SPINS PLAY NOW rtised sites are not endorsed by the Bitcoin Forum. They may be unsafe, untrustworthy, or illegal in your jurisdiction. Advertised sites are not endorsedby the Bitcoin Forum. They maybe unsafe, untrustworthy, oillegal in your jurisdiction. Advertise here.

wumpus



Offline



Activity: 812

Merit: 1000



No Maps for These Territories







Hero MemberActivity: 812Merit: 1000No Maps for These Territories Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 01, 2013, 10:33:15 AM

Last edit: April 01, 2013, 10:47:38 AM by John Smith #2 Nice find, so NVidia finally has a rotate instruction Warning: For most, coin loss is a larger risk than coin theft. A disk can die any time. Regularly back up your wallet through File → Backup Wallet to an external storage or the (encrypted!) cloud. Use a separate offline wallet for storing larger amounts. Bitcoin Core developer [PGP] For most, coin loss is a larger risk than coin theft. A disk can die any time. Regularly back up your wallet throughto an external storage or the (encrypted!) cloud. Use a separate offline wallet for storing larger amounts.

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 02, 2013, 05:41:03 AM #9 @relm9: No I have winows version, I only programm linux. I exit my bitcoin winter sleep to performe the NVIDIA GPU bitcoin mining process. I have now windows PC with K20 or Titan and therefore I can't test this with a winows miner.



@philips: Thanks for the last link, I look in if I can get some more performance.





psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 02, 2013, 02:22:44 PM #12 Not all performance came from the shift function. Most performance came from reducing registers per thread.



Before I start one threads needed 114 32Bit register (134 MHash/s)

After change the code thus we use shift operation we needed 95 32Bit register (~200MHash/s)

And after add shared memory we only need 46 registers. That means we can run 5 Block with 256 threads per streaming multiprocessor and we get 330 MHash/s.



At the moment I work on a version with over 400 MHash/s but I have some problems that the mining pool not count all my solutions.



If I have time I look if I can create a windows version.



psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 03, 2013, 03:46:43 PM #21 Bas news!!!



With K20 we can only get theoreticly 372 MHash/s.



We can run 120 bitshifts and 160 xor,and,add,rotate per clockcycle and multiprocessor. The asm code inside of the loop has 3725 and,or,.. and 162 bitshifts. K20 has 705 MHz and 13 multiprocessors. 705*13/(3725/160+162/120)=372



With GTX Titen we can get 475 MHash/s (theoreticly) because clock rate is 837 MHz and we have 14 multiprocessors.



All this calculations are without the caculation overhead before the loop and I am not really shure if 160 or 120 rotates can caculated per clock. At the moment I only get out real 330 MHash/s :-(

Keep in mind that this optimization not get a factor of two with old gpus, only with sm_35 (new Kepler GPUs)

cbuchner1



Offline



Activity: 756

Merit: 502







Hero MemberActivity: 756Merit: 502 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 03, 2013, 11:10:52 PM #23 Finally CUDA is getting a bit more attention from developers.



I am currently doing similar optimization work for the scrypt hashing used in Litecoin. About doubled the performance I am getting from most of my cards compared to OpenCL miners. This still sucks big time when compared to ATI cards, but it sucks a bit less than before.



With the scrypt hashing it appears much more difficult to lower the kernel's register count, as the required Salsa20/8 rounds are fairly complex beasts, also the memory-hard part of the algorithm really bangs on the memory controller.



Watch out for potential Windows binary releases in the next days. I will post into the alt cryptocurrency forum.



Evan



Offline



Activity: 507

Merit: 500









Hero MemberActivity: 507Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 04, 2013, 12:51:39 AM #24 Quote from: cbuchner1 on April 03, 2013, 11:10:52 PM Finally CUDA is getting a bit more attention from developers.



I am currently doing similar optimization work for the scrypt hashing used in Litecoin. About doubled the performance I am getting from most of my cards compared to OpenCL miners. This still sucks big time when compared to ATI cards, but it sucks a bit less than before.



With the scrypt hashing it appears much more difficult to lower the kernel's register count, as the required Salsa20/8 rounds are fairly complex beasts, also the memory-hard part of the algorithm really bangs on the memory controller.



Watch out for potential Windows binary releases in the next days. I will post into the alt cryptocurrency forum.





for ltc or btc? for ltc or btc?

1PtHcavXoakgNkQfEQdvnvEksEY2NvwaLM I am poor, but i do work for Coin1PtHcavXoakgNkQfEQdvnvEksEY2NvwaLM

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 04, 2013, 07:42:21 PM #33 Please wait and save your BTC, if I have time I add this code in a windows miner which supports CUDA and is still in maintenance. Keep in mind that the source code is open source and you must give out the source for free.

Knows everyone a Windows CUDA Miner? I am out of bitcoin mining since 2 years.



@Evan GTX690 has only sm_30 and not support rotate function :-(

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 05, 2013, 04:09:59 AM

Last edit: April 05, 2013, 05:32:16 PM by charliemaggot #41 That's the Microsoft runtime, but it was linked to the debug version so I've changed it to a release version.



You can download again and I've added the missing file in case you don't have the runtime installed.



edit: CUDA module isn't compiled...working on it.



edit2: I've added the CUDA ptx file, so you can download the latest binaries.



For for expediency, I've thrown in a "buildcuda.bat" in the root that will compile the ptx and put it in bin.



I'll tidy it all up tomorrow (or later today as they say).



gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 05, 2013, 06:37:50 PM #43 I have a GTX 680.. just tried to run it but it spit this out..



Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 CUDA GPU devices found

Setting CUDA device to first device found

Loading module bitcoinminercuda.ptx

Unable to load CUDA module: 209



then it exits..

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 05, 2013, 10:23:12 PM

Last edit: April 05, 2013, 10:38:55 PM by gateway #51



default settings:



Done allocating CUDA resources for (32,128)

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

68767 khash/s

73804 khash/s

74725 khash/s

74484 khash/s

73926 khash/s

73477 khash/s

73711 khash/s



cgminer does about 118-120~



some settings I found just testing but no idea if they are right for the 680 card

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 CUDA GPU devices found

Setting CUDA device to first device found

Loading module bitcoinminercuda.ptx

CUDA initialized

Done allocating CUDA resources for (256,512)

Finding best configuration step end (256,512) 1ms prev best=9223372036854775807ms

Done allocating CUDA resources for (256,512)

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

113145 khash/s

119789 khash/s

126394 khash/s

119730 khash/s

120627 khash/s

122473 khash/s

122970 khash/s



-aggression=8 -gpugrid=256 -gputhreads=512

Ok got this working, any recommended settings ?default settings:Done allocating CUDA resources for (32,128)Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff68767 khash/s73804 khash/s74725 khash/s74484 khash/s73926 khash/s73477 khash/s73711 khash/scgminer does about 118-120~some settings I foundjust testing but no idea if they are right for the 680 cardClient will start 1 miner threadsWork will be refreshed every 4000 ms1 CUDA GPU devices foundSetting CUDA device to first device foundLoading module bitcoinminercuda.ptxCUDA initializedDone allocating CUDA resources for (256,512)Finding best configuration step end (256,512) 1ms prev best=9223372036854775807msDone allocating CUDA resources for (256,512)Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff113145 khash/s119789 khash/s126394 khash/s119730 khash/s120627 khash/s122473 khash/s122970 khash/s-aggression=8 -gpugrid=256 -gputhreads=512

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 03:48:06 AM #57 I am getting a 200 error. I didn't see it covered above. I downloaded all the files in the bin folder into the same folder, is that correct?



Windows 7 x64





C:\miners\rpcminer nvopt>rpcminer-mod-cuda.exe -url=http://stratum.bitcoin.cz:3333 -user=user -password=pass

Client will start 1 miner threads

Work will be refreshed every 4000 ms

2 CUDA GPU devices found

Setting CUDA device to first device found

Loading module bitcoinminercuda.ptx

Unable to load CUDA module: 200

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 04:35:40 AM

Last edit: April 06, 2013, 05:04:57 AM by charliemaggot #58 Quote from: Evan on April 06, 2013, 03:20:41 AM

Nvcc : fatal error : Cannot find compiler 'cl.exe' in PATH





Means that it can't find the C compiler. You need to have the Microsoft C compiler installed (e.g. Visual Studio 10 or Visual Studio 10 Express) to be able to build the cuda files. If you do have it already, you just need to run "C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat" first in the command window to set up the paths.



If you don't have it, either download and install the Express version or just copy down the the prebuilt .ptx files from the bin directory in Means that it can't find the C compiler. You need to have the Microsoft C compiler installed (e.g. Visual Studio 10 or Visual Studio 10 Express) to be able to build the cuda files. If you do have it already, you just need to run "C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat" first in the command window to set up the paths.If you don't have it, either download and install the Express version or just copy down the the prebuilt .ptx files from the bin directory in https://github.com/cdmackie/rpcminer-mod/tree/kepler/bin . There are now different ones, so try the bitcoinminercuda.ptx first. If that works, rename bitcoinminercuda.20.ptx to bitcoinminercuda.ptx and try again. Likewise with bitcoinminercuda.30.ptx. USe the last one that worked.

GimpyPrime



Offline



Activity: 68

Merit: 10







MemberActivity: 68Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 07:04:04 AM #64 I've got it working now, with CUDA I get 100-110mhash/s on a 670GTX FTW(Overclocked), with an i7 CPU



Using OpenCL I get approximately the same range, but even as high as 120-130mhash/s sometimes.



Not expecting to become a bitcoin millionaire with a 670GTX lol, just doing this for fun. However whatever improvements got added do not appear to be assisting with my machine.



Curious though what is the difference between the bitcoinminercuda.20/30 files? I am wondering if there is something I need to compile myself here.



camaro69327



Offline



Activity: 59

Merit: 0









NewbieActivity: 59Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 03:10:04 PM #67 Might as well jump in ....First..no linuix ...just a point click, old guy in Win 7.



I have 2 - 580 GTX. Using CGminer I get 160-200 Mhash PER card . (depends on overclock and using Comp or not)



Trying this..@ first i had the "Unable to load CUDA module: 209" error. I grabbed the other .plx file bitcoinminercuda.20.ptx. Renamed it and...



Now i am getting "curl return Value = 7"



Kinda getting lost here...lol These are some of the Command lines tried....



rpcminer-mod-cuda.exe -aggression=8 -gpugrid=64 -gputhreads=384 -o - url=http://stratum.bitcoin.cz:3333 -user=####### -password=#####

rpcminer-mod-cuda.exe -aggression=8 -gpugrid=256 -gputhreads=512 - url=http://stratum.bitcoin.cz:3333 -user=###### -password=#####

rpcminer-mod-cuda.exe -url=http://stratum.bitcoin.cz:3333 -user=##### -password=####

rpcminer-mod-cuda.exe -url=http://localhost:8332 -user=##### -password=#### <<(set according to Bitcoin.conf)



"curl return Value = 7"



Thanks for all the hard work you guys do, would really like to get these cards working better (they are embarrassed to announce their terrible Hash rates to all the other cards on my network. Especially the 7970 getting 720 Mhash...lol).



gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 05:05:26 PM #69 Quote from: camaro69327 on April 06, 2013, 03:10:04 PM Might as well jump in ....First..no linuix ...just a point click, old guy in Win 7.



I have 2 - 580 GTX. Using CGminer I get 160-200 Mhash PER card . (depends on overclock and using Comp or not)



Trying this..@ first i had the "Unable to load CUDA module: 209" error. I grabbed the other .plx file bitcoinminercuda.20.ptx. Renamed it and...



Now i am getting "curl return Value = 7"



Kinda getting lost here...lol These are some of the Command lines tried....



rpcminer-mod-cuda.exe -aggression=8 -gpugrid=64 -gputhreads=384 -o - url=http://stratum.bitcoin.cz:3333 -user=####### -password=#####

rpcminer-mod-cuda.exe -aggression=8 -gpugrid=256 -gputhreads=512 - url=http://stratum.bitcoin.cz:3333 -user=###### -password=#####

rpcminer-mod-cuda.exe -url=http://stratum.bitcoin.cz:3333 -user=##### -password=####

rpcminer-mod-cuda.exe -url=http://localhost:8332 -user=##### -password=#### <<(set according to Bitcoin.conf)



"curl return Value = 7"



Thanks for all the hard work you guys do, would really like to get these cards working better (they are embarrassed to announce their terrible Hash rates to all the other cards on my network. Especially the 7970 getting 720 Mhash...lol).





dont use the stratum url us this instead.. btcguild.com:8332 dont use the stratum url us this instead.. btcguild.com:8332

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 06:16:11 PM #71 Please use a pool with getwork support, there is no real strtum support inside the miner.



Today night (german time) I post a new reposetory with my new code. I hope charliemaggot create a windows version.

The new code supports all old GPU (I think till GTX9800). The is no ptx needed.



I think on old GPU we can't get a good speedup because the GPUs has very slow bit operations.



GPU Overview:



C1070 - old GPU - 30 Streaming Multiprocessors (SM) - ~53MHash/s

C2050 - old Fermi - 14 SM - ~ 90MHash/s

K20c - new Kepler - 13 SM - ~ 325MHash/s

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 08:27:56 PM

Last edit: April 06, 2013, 08:39:06 PM by charliemaggot #72



You can obviously build them yourself, I was just trying to include as much as possible so it could just be downloaded and run, however I didn't include a note about which file was needed. If you have VS 2010 you can edit the buildcuda.bat file and change the compute number to be appropriate for your card - and change the compiler value if you are using VS2012.



@dentldir Should work with the 30 file on your 660. Latest stable driver? Maybe try again after I rebuild it from psychocoder's latest changes.



@camaro69327 Your card is a 2.0 Fermi device, so not sure you would see much benefit from the Kepler (3.0) optimisation in this thread. Wait and see if psychocoder can create some better optimisations. The app is still using the getwork api, so you need to use



@psychocoder I'll start building it for Windows once you get changes done, if you could let me know where they are. Thanks.

@GimpyPrime The default bitcoinminercuda.ptx file was built to "compute 3.5", which was just from the patch in psychocoder's first post in this thread. It is the latest optimised level for NVIDIA Telsa K20 cards. You need to know the compute level for your card ( https://developer.nvidia.com/cuda-gpus ) and use the appropriate file. Your GTX 670 should be compute level 3.0, so you would need the .30 file.You can obviously build them yourself, I was just trying to include as much as possible so it could just be downloaded and run, however I didn't include a note about which file was needed. If you have VS 2010 you can edit the buildcuda.bat file and change the compute number to be appropriate for your card - and change the compiler value if you are using VS2012.@dentldir Should work with the 30 file on your 660. Latest stable driver? Maybe try again after I rebuild it from psychocoder's latest changes.@camaro69327 Your card is a 2.0 Fermi device, so not sure you would see much benefit from the Kepler (3.0) optimisation in this thread. Wait and see if psychocoder can create some better optimisations. The app is still using the getwork api, so you need to use http://api.bitcoin.cz:8332 or download their stratum proxy.@psychocoder I'll start building it for Windows once you get changes done, if you could let me know where they are. Thanks.

camaro69327



Offline



Activity: 59

Merit: 0









NewbieActivity: 59Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 09:30:09 PM #73 Thanks for the responses guys. I did get this working as i had the wrong port for localhost. 9332 Worked. Only could get one card hashing. Could get 160 easy. 214 was another common Mhash.



300 + Mh but only for 3 or 4 shares then 0.



Back to Cgminer and a steady 170 - 200 for now. I thought this was the Gen i had. Fermi not Kelper Duh.











psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 06, 2013, 10:38:35 PM #74

Now I only need 32 Register for Fermi and have 100% occupancy. There is no need to create ptx because all versions are inside the binary after compiling.



Sry that I create a new repo but it was my first git commit. I am oldschool and normaly use svn^^



@charliemaggot: Please add windows compile support. I have checked in my code to https://github.com/psychocoder-germany/rpcminer-mod . The code is a little bit slower than my first patch. I have put same calculations to cpu to save registers.Now I only need 32 Register for Fermi and have 100% occupancy. There is no need to create ptx because all versions are inside the binary after compiling.Sry that I create a new repo but it was my first git commit. I am oldschool and normaly use svn^^@charliemaggot: Please add windows compile support.

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 12:42:33 AM #78



GTX680 has not the new bit rotate (funnel) operator I think 300+ is not possible.

Theorethic calculation: 1006*8/(3733/160+1194/32)=132 MHash/s (magic numbers are the count of operations from the binary for this implementation)



IMO the GTX680 GPU is limited to max 132 MHash/s K20c is http://www.techpowerup.com/gpudb/564/NVIDIA_Tesla_K20c.html a high performance GPU card. This cards are created to run math calculations with floating point operations. It is nothing for a home pc. The version for the home pc with the same architecture is GTX Titan http://www.techpowerup.com/gpudb/1996/.html GTX680 has not the new bit rotate (funnel) operator I think 300+ is not possible.Theorethic calculation: 1006*8/(3733/160+1194/32)=132 MHash/s (magic numbers are the count of operations from the binary for this implementation)IMO the GTX680 GPU is limited to max 132 MHash/s

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 05:58:23 AM

Last edit: April 07, 2013, 06:15:35 AM by charliemaggot #80



(use the master branch)



We'll merge them together shortly.



To just run, you only need the bin folder, and run the rpcminer-mod-cuda.exe. There is no need for the ptx files anymore.



To build yourself, you need MSVC 2010 and the CUDA SDK 5.x.



Please post any errors or successes. I have updated the Windows build with psychocoder's changes: https://github.com/cdmackie/rpcminer-mod (use the master branch)We'll merge them together shortly.To just run, you only need the bin folder, and run the rpcminer-mod-cuda.exe. There is no need for the ptx files anymore.To build yourself, you need MSVC 2010 and the CUDA SDK 5.x.Please post any errors or successes.

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 06:45:43 AM #81 Quote from: charliemaggot on April 07, 2013, 05:58:23 AM



(use the master branch)



We'll merge them together shortly.



To just run, you only need the bin folder, and run the rpcminer-mod-cuda.exe. There is no need for the ptx files anymore.



To build yourself, you need MSVC 2010 and the CUDA SDK 5.x.



Please post any errors or successes.

I have updated the Windows build with psychocoder's changes: https://github.com/cdmackie/rpcminer-mod (use the master branch)We'll merge them together shortly.To just run, you only need the bin folder, and run the rpcminer-mod-cuda.exe. There is no need for the ptx files anymore.To build yourself, you need MSVC 2010 and the CUDA SDK 5.x.Please post any errors or successes.

This version of C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exe is not compatible with the version of Windows you're running. Ch

eck your computer's system information to see whether you need a x86 (32-bit) or

x64 (64-bit) version of the program, and then contact the software publisher.



Error message when starting from command line. Win 7 x64 This version of C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exe is not compatible with the version of Windows you're running. Check your computer's system information to see whether you need a x86 (32-bit) orx64 (64-bit) version of the program, and then contact the software publisher.Error message when starting from command line. Win 7 x64

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 11:22:16 AM #84



I have testet the windows version on Windows7 64.



I start the program and the cudart32_50.dll or cudart64_50.dll was mist. I install cuda for windows

It can be that this is a sideeffect from switching to runtime library of cuda.



If an error shows that bitconminer.cpp:174 crash that the parameter -gpugrid to 256 or 512, it came from windows intern limit that no calculation on gpu can run longer than 4 sec.



With GT555M I get:



22.3 - 22.6 MHash/s with OpenCL

22.3 - 22.9 MHash/s with CUDA



I put allw links to reposetories and Errors to the top post. HI,I have testet the windows version on Windows7 64.I start the program and the cudart32_50.dll or cudart64_50.dll was mist. I install cuda for windows https://developer.nvidia.com/cuda-downloads and than all run.It can be that this is a sideeffect from switching to runtime library of cuda.If an error shows that bitconminer.cpp:174 crash that the parameter -gpugrid to 256 or 512, it came from windows intern limit that no calculation on gpu can run longer than 4 sec.With GT555M I get:22.3 - 22.6 MHash/s with OpenCL22.3 - 22.9 MHash/s with CUDAI put allw links to reposetories and Errors to the top post.

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 05:16:42 PM #85



@psychocoder Seems there are some other ways around the time-out issue:



1) As you say, change grid values

2) Unplug the monitor from the card... assuming you aren't using it

3) Change registry settings to increase the time-out. See



Were the 22.3-22.9 values you got on your GT555M in Windows what you expected? Is it the same as Unix?

@InqBit Just seems like the file is corrupted. Does it match the one on gibhub? Can you try downloading it again? You will need to 32bit CUDA SDK, or at least cudart32_50.dll if that isn't included in the 64bit version. I hadn't built 64bit, as there were 3rd party dependencies, but I might try and them them later.@psychocoder Seems there are some other ways around the time-out issue:1) As you say, change grid values2) Unplug the monitor from the card... assuming you aren't using it3) Change registry settings to increase the time-out. See http://msdn.microsoft.com/en-us/library/windows/hardware/ff569918(v=vs.85).aspx . Perhaps only for the hardcore and if it's going to make a significant difference.Were the 22.3-22.9 values you got on your GT555M in Windows what you expected? Is it the same as Unix?

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 06:38:06 PM #86 Quote Were the 22.3-22.9 values you got on your GT555M in Windows what you expected? Is it the same as Unix? Can't test it under linux. Thats a Laptop with 2 GPUs it was not possible to run the miner under linux :-(



I hope we get some results from a user with a GTX Titan. Can't test it under linux. Thats a Laptop with 2 GPUs it was not possible to run the miner under linux :-(I hope we get some results from a user with a GTX Titan.

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 07:34:40 PM #87 Quote from: charliemaggot on April 07, 2013, 05:16:42 PM @InqBit Just seems like the file is corrupted. Does it match the one on gibhub? Can you try downloading it again? You will need to 32bit CUDA SDK, or at least cudart32_50.dll if that isn't included in the 64bit version. I hadn't built 64bit, as there were 3rd party dependencies, but I might try and them them later.





Downloaded & installed the x64 CUDA dev software & the .exe is just crashing now. I re-downloaded all the files this morning & was getting an error about the cudart32 file, & that's when I downloaded the CUDA package.



It begins to start, then crashes after outputting:



Client will start 1 miner threads

Work will be refreshed every 4000 ms

2 GPU GUDA device(s) found

<src\cuda\bitcoinminercuda.cpp>:54



Event log below:



Faulting application name: rpcminer-mod-cuda.exe, version: 0.0.0.0, time stamp: 0x5161065f

Faulting module name: rpcminer-mod-cuda.exe, version: 0.0.0.0, time stamp: 0x5161065f

Exception code: 0x40000015

Fault offset: 0x0002eadc

Faulting process id: 0x19ac

Faulting application start time: 0x01ce33c69eee3f83

Faulting application path: C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exe

Faulting module path: C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exe

Report Id: dd0fe4b3-9fb9-11e2-94d6-001fbc083cc7 Downloaded & installed the x64 CUDA dev software & the .exe is just crashing now. I re-downloaded all the files this morning & was getting an error about the cudart32 file, & that's when I downloaded the CUDA package.It begins to start, then crashes after outputting:Client will start 1 miner threadsWork will be refreshed every 4000 ms2 GPU GUDA device(s) found :54Event log below:Faulting application name: rpcminer-mod-cuda.exe, version: 0.0.0.0, time stamp: 0x5161065fFaulting module name: rpcminer-mod-cuda.exe, version: 0.0.0.0, time stamp: 0x5161065fException code: 0x40000015Fault offset: 0x0002eadcFaulting process id: 0x19acFaulting application start time: 0x01ce33c69eee3f83Faulting application path: C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exeFaulting module path: C:\miners\rpcminer nvopt\rpcminer-mod-cuda.exeReport Id: dd0fe4b3-9fb9-11e2-94d6-001fbc083cc7

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 08:21:05 PM #88



when running rpc a window pops up and says



the program cant start because cuda32_50_35.dll is missing from your computer..



seems something went whacko I can confirm im getting the same thing..when running rpc a window pops up and saysthe program cant start because cuda32_50_35.dll is missing from your computer..seems something went whacko

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 09:11:58 PM #89 Quote from: InqBit on April 07, 2013, 07:34:40 PM

Downloaded & installed the x64 CUDA dev software & the .exe is just crashing now. I re-downloaded all the files this morning & was getting an error about the cudart32 file, & that's when I downloaded the CUDA package.





Ok, whilst building a debug version to try and track your issue down, I found out that there is an error in the error handler. So based on the line you had the error, it's that the CUDA device driver didn't initialise properly. Do you have the right and latest drivers?



I've fixed the error handler and uploaded a new binary so you should see a more meaningful message.

Ok, whilst building a debug version to try and track your issue down, I found out that there is an error in the error handler. So based on the line you had the error, it's that the CUDA device driver didn't initialise properly. Do you have the right and latest drivers?I've fixed the error handler and uploaded a new binary so you should see a more meaningful message.

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 09:43:05 PM

Last edit: April 07, 2013, 11:05:24 PM by gateway #91 Quote

Do you have 32-bit CUDA SDK 5.0 installed?



I get the same error, no I never installed cuda since I wasnt compiling this, I have the latest nvidia drivers tho since this is my uber gaming rig..



any chance to include the dll?



EDIT: .. installed 32bit version of cuda 5 from nvidias site.. now when I run the miner I the program crashs.. its not missing the dll .. but



Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

<src\cuda\bitcoinminercuda.cpp>:54[CUDA] Error: invalid device ordinal I get the same error, no I never installed cuda since I wasnt compiling this, I have the latest nvidia drivers tho since this is my uber gaming rig..any chance to include the dll?EDIT: .. installed 32bit version of cuda 5 from nvidias site.. now when I run the miner I the program crashs.. its not missing the dll .. butClient will start 1 miner threadsWork will be refreshed every 4000 ms1 GPU GUDA device(s) found :54[CUDA] Error: invalid device ordinal

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 07, 2013, 10:04:53 PM

Last edit: April 07, 2013, 10:27:24 PM by InqBit #92 Quote from: charliemaggot on April 07, 2013, 09:11:58 PM Quote from: InqBit on April 07, 2013, 07:34:40 PM

Downloaded & installed the x64 CUDA dev software & the .exe is just crashing now. I re-downloaded all the files this morning & was getting an error about the cudart32 file, & that's when I downloaded the CUDA package.





Ok, whilst building a debug version to try and track your issue down, I found out that there is an error in the error handler. So based on the line you had the error, it's that the CUDA device driver didn't initialise properly. Do you have the right and latest drivers?



I've fixed the error handler and uploaded a new binary so you should see a more meaningful message.



Ok, whilst building a debug version to try and track your issue down, I found out that there is an error in the error handler. So based on the line you had the error, it's that the CUDA device driver didn't initialise properly. Do you have the right and latest drivers?I've fixed the error handler and uploaded a new binary so you should see a more meaningful message.

I am 1 version behind on drivers, will download the latest ones.



Latest error with newest rpcminer-mod-cuda (1 version behind on drivers fyi)



Client will start 1 miner threads

Work will be refreshed every 4000 ms

2 GPU GUDA device(s) found

<src\cuda\bitcoinminercuda.cpp>:54[CUDA] Error: invalid device ordinal

Could not retrieve work from RPC server.

CURL return value = 7



edit: Latest drivers, 314.22, same error as above. I am 1 version behind on drivers, will download the latest ones.Latest error with newest rpcminer-mod-cuda (1 version behind on drivers fyi)Client will start 1 miner threadsWork will be refreshed every 4000 ms2 GPU GUDA device(s) found :54[CUDA] Error: invalid device ordinalCould not retrieve work from RPC server.CURL return value = 7edit: Latest drivers, 314.22, same error as above.

coastermonger



Offline



Activity: 367

Merit: 250



Find me at Bitrated







Sr. MemberActivity: 367Merit: 250Find me at Bitrated Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 02:23:29 AM #93 I'm trying out the windows version with a GTX 670 and it doesn't seem to be working



I can confirm that I've installed CUDA dev software 5.0

I also have updated drivers including cudart32_50_35.dll



When clicking on the rpcminer-mod-cuda.exe file the black cmd screen comes up and an error window immediately says "rpcminer-mod-cuda.exe has stopped working"



On the cmd window in the background it says



Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU CUDA device(s) found

<srcudbitcoinmindercuda.cpp>:54[CUDA] Error: invalid device ordinal

Error:invalid device ordinal



When I run the debugger it tells me there is a Run-Time Check Failure #0 - The value of ESP was not properly saved across a function call. This is usually the result of calling a function declared with one calling convention with a function pointer declared with a different calling convention



To be honest I wasn't sure where exactly to extract the zip files, or how to make a file that alters how the executable runs... Any help will be greatly appreciated. Post your address and I'll tip you my first bitcent or so Bitrated user: Rees.

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 02:25:05 AM #94 I've added the cudart32_50_35.dll to the binaries so it can be downloaded, although you've probably got it by now.



I think there is a bug if you don't specify the gpu flag, e.g. using "-gpu" and not something like "-gpu=0". So can you try explicitly setting it 0, 1 or whatever.



If you still get an error, there should be a rpcminer.dmp file created in the same folder. Can you send it to me, charlie (at) legmail.com

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 07:00:32 AM #101 I did a full repull of the files from charlie's github and it is working now, using only 1 of my 2 gpu's. I did uninstall the CUDA dev prog as well after seeing you did, and pulled the correct file it needed from github.



As of now, not seeing any difference in hash rate on a Fermi card, but I assume that's what was expected.

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 07:15:40 AM #102 Here was my output fwiw. GTX 480



C:\miners\rpcminer nvopt>rpcminer-mod-cuda.exe -url=api.bitcoin.cz:8332 -user=user -password=password -gpugrid=512 -gputhreads=480 -gpu=0

Client will start 1 miner threads

Work will be refreshed every 4000 ms

2 GPU GUDA device(s) found

CUDA initialized

Search Configuration for gpu named: GeForce GTX 480

Your device:

- 15 streaming multiprocessors (SM)

- can run sm version 200

- bitcoin miner is optimized for 3 gpugrids per SM

- max value for gpugrid parameter is 65535

max nonce 3FFFFFFF

Done allocating CUDA resources for (512,256)

iterations on gpu=32 blocks=512 threads=256

Iterations: 255,hashs per iteration 15994212185991544832

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

Found nonce 364CFF6C

2013-04-08 07:11:01 Found Hash!

Hashs found: 1

Sending to server: {"method":"getwork","params":["000000029bddf49cc3f72f75bf4552

d589df0fb9ec95184c247082e00000009200000000ffb19deae961efc905812ff5b1fcf69ec34f2 7

c9d0ba4194749c538e3ceb2ca751626d7e1a022fbe6cff4c3600000080000000000000000000000 0

000000000000000000000000000000000000000000000000000000000080020000"],"id":1}

Server sent: {"result": true, "id": "1", "error": null}

Found nonce AE9CEA4

2013-04-08 07:11:03 Found Hash!

Hashs found: 2

Sending to server: {"method":"getwork","params":["000000029bddf49cc3f72f75bf4552

d589df0fb9ec95184c247082e000000092000000000c048fe68ab3317714aaee783c242a630feb0 1

86961ef159df4949318f8688e051626d851a022fbea4cee90a00000080000000000000000000000 0

000000000000000000000000000000000000000000000000000000000080020000"],"id":1}

Server sent: {"result": true, "id": "1", "error": null}

137694 khash/s

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 08:55:10 AM #104 @InqBit: is 137694 khash/s the first shown hash rate? Please run the miner 5 min and than look to the hash rate. The first hash rate is always lower because the first kernel call takes very long.

I see you run the code with without -aggression, please add -aggression=8 or 7 to you parameter to get more performance. -gputhreads is not needed because it is ignored and hard set to 256.



theoretic calculation rate for GTX480: 1401*15/(3733/32+1194/16)=109 MHash/s (magic numbers are the count of operations from the binary for this implementation)



Mhh in my theoretic calculation I assumed 109 MHash/s for a GTX480. I think I must check if my operation count for fermi is right.

Can someone with a GTX480 run the code under linux without -gpugrid.

InqBit



Offline



Activity: 27

Merit: 0









NewbieActivity: 27Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 09:01:59 AM

Last edit: April 08, 2013, 09:13:40 AM by InqBit #105 Quote from: psychocoder on April 08, 2013, 08:55:10 AM @InqBit: is 137694 khash/s the first shown hash rate? Please run the miner 5 min and than look to the hash rate. The first hash rate is always lower because the first kernel call takes very long.

I see you run the code with without -aggression, please add -aggression=8 or 7 to you parameter to get more performance. -gputhreads is not needed because it is ignored and hard set to 256.



theoretic calculation rate for GTX480: 1401*15/(3733/32+1194/16)=109 MHash/s (magic numbers are the count of operations from the binary for this implementation)



Mhh in my theoretic calculation I assumed 109 MHash/s for a GTX480. I think I must check if my operation count for fermi is right.

Can someone with a GTX480 run the code under linux without -gpugrid.



I just made the appropriate changes and started a new run. Will let you know in a few how it works out.



FWIW, I've been running ~140-145'ish on cgminer, overclocked to 800 core (700 stock). This is my general use machine, so I didn't have aggression set to keep desktop usable, but will run at 7 for testing for you. I just checked my average hashrate on cgwatcher, and it is 140.1.



edit: 10 minutes later



C:\miners\rpcminer nvopt>rpcminer-mod-cuda.exe -url=api.bitcoin.cz:8332 -user=user -password=pass -gpugrid=512 -gpu=0 -aggression=7





last 5 hashrates

140345 khash/s

140360 khash/s

140052 khash/s

139525 khash/s

140080 khash/s I just made the appropriate changes and started a new run. Will let you know in a few how it works out.FWIW, I've been running ~140-145'ish on cgminer, overclocked to 800 core (700 stock). This is my general use machine, so I didn't have aggression set to keep desktop usable, but will run at 7 for testing for you. I just checked my average hashrate on cgwatcher, and it is 140.1.edit: 10 minutes laterC:\miners\rpcminer nvopt>rpcminer-mod-cuda.exe -url=api.bitcoin.cz:8332 -user=user -password=pass -gpugrid=512 -gpu=0 -aggression=7last 5 hashrates140345 khash/s140360 khash/s140052 khash/s139525 khash/s140080 khash/s

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 04:45:44 PM

Last edit: April 08, 2013, 05:50:57 PM by gateway #108 64 bit version tests:



my results.. for 680





C:\Users\gateway\Desktop\rpcminer-mod-x64\bin>rpcminer-mod-cuda.exe -url=btcguild.com:8332 -user=xxx -password=xxx -gpugrid=2048 -gputhreads=256 -gpu=0

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

CUDA initialized

Search Configuration for gpu named: GeForce GTX 680

Your device:

- 8 streaming multiprocessors (SM)

- can run sm version 300

- bitcoin miner is optimized for 5 gpugrids per SM

- max value for gpugrid parameter is 2147483647

max nonce 3FFFFFFF

Done allocating CUDA resources for (2048,256)

iterations on gpu=32 blocks=2048 threads=256

Iterations: 63,hashs per iteration 16777216

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff





btw when I exit the app it kills my display driver..



my results for 560ti



C:\Users\stevet\Desktop\rpcminer-mod-x64\bin>rpcminer-mod-cuda.exe -url=btcguild.com:8332 -user=xxx -password=xxx -gpu=0

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

CUDA initialized

Search Configuration for gpu named: GeForce GTX 560 Ti

Your device:

- 8 streaming multiprocessors (SM)

- can run sm version 210

- bitcoin miner is optimized for 3 gpugrids per SM

- max value for gpugrid parameter is 65535

max nonce 3FFFFFFF

Autotuning is on because no gridsize is given

To big grid size fixed!!

Done allocating CUDA resources for (65535,256)

iterations on gpu=32 blocks=65535 threads=256

Iterations: 2,hashs per iteration 536862720

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

<src\cuda\bitcoinminercuda.cpp>:180[CUDA] Error: unknown error



app crashes..

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 06:25:43 PM #109 Quote from: gateway on April 08, 2013, 04:45:44 PM 64 bit version tests:



my results.. for 680





C:\Users\gateway\Desktop\rpcminer-mod-x64\bin>rpcminer-mod-cuda.exe -url=btcguild.com:8332 -user=xxx -password=xxx -gpugrid=2048 -gputhreads=256 -gpu=0

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

CUDA initialized

Search Configuration for gpu named: GeForce GTX 680

Your device:

- 8 streaming multiprocessors (SM)

- can run sm version 300

- bitcoin miner is optimized for 5 gpugrids per SM

- max value for gpugrid parameter is 2147483647

max nonce 3FFFFFFF

Done allocating CUDA resources for (2048,256)

iterations on gpu=32 blocks=2048 threads=256

Iterations: 63,hashs per iteration 16777216

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff



Can you run the miner same minits that we can see the hashrate. My driver under windows also crash, thats windows like shutdown^^



Quote from: gateway on April 08, 2013, 04:45:44 PM my results for 560ti



C:\Users\stevet\Desktop\rpcminer-mod-x64\bin>rpcminer-mod-cuda.exe -url=btcguild.com:8332 -user=xxx -password=xxx -gpu=0

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

CUDA initialized

Search Configuration for gpu named: GeForce GTX 560 Ti

Your device:

- 8 streaming multiprocessors (SM)

- can run sm version 210

- bitcoin miner is optimized for 3 gpugrids per SM

- max value for gpugrid parameter is 65535

max nonce 3FFFFFFF

Autotuning is on because no gridsize is given

To big grid size fixed!!

Done allocating CUDA resources for (65535,256)

iterations on gpu=32 blocks=65535 threads=256

Iterations: 2,hashs per iteration 536862720

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

<src\cuda\bitcoinminercuda.cpp>:180[CUDA] Error: unknown error



app crashes..



Plese use -aggression=6 as parameter, the gpu is to slow and braek the kernel run limit. Can you run the miner same minits that we can see the hashrate. My driver under windows also crash, thats windows like shutdown^^Plese use -aggression=6 as parameter, the gpu is to slow and braek the kernel run limit.

coastermonger



Offline



Activity: 367

Merit: 250



Find me at Bitrated







Sr. MemberActivity: 367Merit: 250Find me at Bitrated Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 08:10:58 PM #112 I've got a GTX 670 and 50miner (via poclbm or cgminer) gives me about 92 mH/s. I'm wanting to see what this does



I'm going to ask a real noob question here, but where do I extract the 7zip files to? Clicking on the .exe from the bin just causes it immediately open and then stop working, and I can't input any parameters. Bitrated user: Rees.

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 08:31:24 PM

Last edit: April 08, 2013, 08:41:34 PM by charliemaggot #114 Quote from: coastermonger on April 08, 2013, 08:10:58 PM I'm going to ask a real noob question here, but where do I extract the 7zip files to? Clicking on the .exe from the bin just causes it immediately open and then stop working, and I can't input any parameters.



You don't need to extract 7z files unless you are compiling it yourself in Visual Studio.



Otherwise you just run it from the bin folder:



e.g.



rpcminer-mod-cuda.exe -url=http://<yourpool>:<poolport> -user=<username> -password=<password> -gpu=0 -gputhreads=256 -aggression=8



where

<yourpool> is your pool address, e.g. 127.0.0.1 if mining solo or something like api.bitcoin.cz

<poolport> is your pool's port, normally 8332

<username> is your pool login

<password> .. you guessed it



If you get an error whilst it's running, it may because the GPU processing is taking too long and by default Windows thinks the GPU has crashed after 5 seconds. So try reducing aggression, or add "-gpugrid=256" or "-gpugrid=512".



If you are still getting problems, can you include the error text you get.

You don't need to extract 7z files unless you are compiling it yourself in Visual Studio.Otherwise you just run it from the bin folder:e.g.rpcminer-mod-cuda.exe -url=http:// : -user= -password= -gpu=0 -gputhreads=256 -aggression=8where is your pool address, e.g. 127.0.0.1 if mining solo or something like api.bitcoin.cz is your pool's port, normally 8332 is your pool login .. you guessed itIf you get an error whilst it's running, it may because the GPU processing is taking too long and by default Windows thinks the GPU has crashed after 5 seconds. So try reducing aggression, or add "-gpugrid=256" or "-gpugrid=512".If you are still getting problems, can you include the error text you get.

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 08, 2013, 09:14:00 PM #117

Quote What exactly are you seeing here? I've only got an Amazon EC2 instance to test on, and I don't get an issue. You're Ctrl-C-ing?







Yea im doing ctrl-c to exit out of the app in a dos prompt on win 7 Yea im doing ctrl-c to exit out of the app in a dos prompt on win 7

coastermonger



Offline



Activity: 367

Merit: 250



Find me at Bitrated







Sr. MemberActivity: 367Merit: 250Find me at Bitrated Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 09, 2013, 01:53:23 AM #119 Quote You don't need to extract 7z files unless you are compiling it yourself in Visual Studio.



Otherwise you just run it from the bin folder:



e.g.



rpcminer-mod-cuda.exe -url=http://<yourpool>:<poolport> -user=<username> -password=<password> -gpu=0 -gputhreads=256 -aggression=8



where

<yourpool> is your pool address, e.g. 127.0.0.1 if mining solo or something like api.bitcoin.cz

<poolport> is your pool's port, normally 8332

<username> is your pool login

<password> .. you guessed it



If you get an error whilst it's running, it may because the GPU processing is taking too long and by default Windows thinks the GPU has crashed after 5 seconds. So try reducing aggression, or add "-gpugrid=256" or "-gpugrid=512".



If you are still getting problems, can you include the error text you get.

I really appreciate you guys being so patient with me. But where exactly do I even input these commands? I do know how to make a batch file, I go to notepad, type the commands I want and save it as a .bat, drag the file in the folder and try to run it, but this doesn't seem to work. I really appreciate you guys being so patient with me. But where exactly do I even input these commands? I do know how to make a batch file, I go to notepad, type the commands I want and save it as a .bat, drag the file in the folder and try to run it, but this doesn't seem to work. Bitrated user: Rees.

coastermonger



Offline



Activity: 367

Merit: 250



Find me at Bitrated







Sr. MemberActivity: 367Merit: 250Find me at Bitrated Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 09, 2013, 02:08:06 AM #120 I know this is pretty typical stuff for you guys but this is one of the first times I've had to deal with batch files.



Initially it crashed, then I added the command -gpugrid 512 and it worked properly.

all the arguments I'm using are -gpu=0 -gputhreads=256 -aggression=8 -gpugrid=512



Normally, with a basic 50miner program my nvidia GTX 670 will max out at 92-93 mH/s, but now it seems to be pushing 96+ mh/s. I'm going to continue to tinker and see if I can push it higher. Okay! I actually got it to work properly. I created a batch file in the same folder and this time actually included the words rpcminer-mod-cuda.exe at the beginning hahaI know this is pretty typical stuff for you guys but this is one of the first times I've had to deal with batch files.Initially it crashed, then I added the command -gpugrid 512 and it worked properly.all the arguments I'm using are -gpu=0 -gputhreads=256 -aggression=8 -gpugrid=512Normally, with a basic 50miner program my nvidia GTX 670 will max out at 92-93 mH/s, but now it seems to be pushing 96+ mh/s. I'm going to continue to tinker and see if I can push it higher. Bitrated user: Rees.

charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 09, 2013, 06:42:45 AM #122 Quote from: jeprokzki on April 09, 2013, 05:48:18 AM hi guys im watching this thread last day can you tell me how to download this? im interested to try it to my gtx660ti im current hashing 101.5mh/s via cgminer v.2,11,4



Go to



and either use git and checkout the repository, or just click the "Zip" button at the top to download it.



To run, you only need the files from the "bin" directory, so extract the whole thing or just that.



Open a command prompt, go into that bin directory, and run the command line:



rpcminer-mod-cuda.exe -url=http://<pool>:<port> -user=<username> -password=<password> -gpu -aggression=8



where <pool> and <port> are your pool server, e.g. api.bitcoin.cz:8332, and obviously <username> and <password>



If you get an error whilst it's running, it may be because the GPU processing is taking too long and by default Windows thinks the GPU has crashed after 5 seconds.



So try:

1) reducing aggression

2) add "-gpugrid=256" or "-gpugrid=512" (see which works and is best)

3) unplug the monitor from the card (if appropriate)

4) adjust the Windows GPU timeout settings (



Once its working, you can increase aggression and/or gpugrid to get best settings.

Go to https://github.com/cdmackie/rpcminer-mod and either use git and checkout the repository, or just click the "Zip" button at the top to download it.To run, you only need the files from the "bin" directory, so extract the whole thing or just that.Open a command prompt, go into that bin directory, and run the command line:rpcminer-mod-cuda.exe -url=http:// : -user= -password= -gpu -aggression=8where and are your pool server, e.g. api.bitcoin.cz:8332, and obviously and If you get an error whilst it's running, it may be because the GPU processing is taking too long and by default Windows thinks the GPU has crashed after 5 seconds.So try:1) reducing aggression2) add "-gpugrid=256" or "-gpugrid=512" (see which works and is best)3) unplug the monitor from the card (if appropriate)4) adjust the Windows GPU timeout settings ( http://msdn.microsoft.com/en-us/library/windows/hardware/ff569918(v=vs.85).aspx ), e.g. TdrDelay to 20 and TdrDdiDelay to 60.Once its working, you can increase aggression and/or gpugrid to get best settings.

Ephebus



Offline



Activity: 57

Merit: 0









NewbieActivity: 57Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 10, 2013, 07:25:17 AM

Last edit: April 10, 2013, 08:51:55 AM by Ephebus #136 Just tested the program mining solo with my GTS 450 (core and shaders overclocked to 950 MHz/1900 MHz, 1 GB GDDR5 memory underclocked to 900 MHz) on Windows XP 64 and it ran with no problems. I used this command line:



rpcminer-mod-cuda.exe -url=http://127.0.0.1 -user=XXX -password=XXX -gpu -aggression=1 -gpugrid=1024



Got ~48 MHash/s with those settings. I already had CUDA SDK 5.0 installed (both the 32-bit and the 64-bit versions). Only needed to download rpcminer-mod-cuda.exe, libeay32.dll and ssleay32.dll.



Client will start 1 miner threads

Work will be refreshed every 4000 ms

Target = 000000000000022fbe00000000000000000000000000000000000000000000

1 GPU GUDA device(s) found

Setting GPU GUDA device 0

CUDA initialized

Search Configuration for gpu named: GeForce GTS 450

Your device:

- 4 streaming multiprocessors (SM)

- can run sm version 210

- bitcoin miner is optimized for 3 gpugrids per SM

- max value for gpugrid parameter is 65535

max nonce 3FFFFFFF

Done allocating CUDA resources for (1024,256)

iterations on gpu=1 blocks=1024 threads=256

Iterations: 4095,hashs per iteration 262144

48007 khash/s

48120 khash/s

Found nonce 1F948E95

48112 khash/s

48129 khash/s

Found nonce 15DE29EC

48120 khash/s



EVGA Precision showed the card was using 99% of the GPU (that program only goes as high as 99% if I'm not mistaken), but the desktop was absolutely smooth as if nothing else was using the GPU.



Tried -aggression=5 but that only increased the hash rate by ~0.5 MHash/s and the desktop got laggy (but still usable), so I don't think that's worth it in my case. Same thing with -gpugrid - could increase it up to 4096 before the desktop started lagging a little. Hash rate with -gpugrid=4096 increased by ~1 MHash/s only.

Ephebus



Offline



Activity: 57

Merit: 0









NewbieActivity: 57Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 10, 2013, 09:19:32 AM #139 Quote from: psychocoder on April 10, 2013, 08:50:24 AM @Ephebus :



You get better performance if you set -aggression= to a bigger value and -gpugrid also to bigger value. But than you desktop is not smooth. Do you tested also a other miner?



Yes, I was editing the post with my experiments with -gpugrid when you posted. I could get it up to 4096 before the desktop started lagging a little for an increase of ~1 MHash/s. Not worth it in my case, but will probably make a much bigger difference with more powerful cards.



Also, lowering -gpugrid to 256 the hash rate only dropped to ~47 MHashes/s and I could play encoded XviD and X264 content smoothly at full screen, and was even able to join my Wolfenstein: Enemy Territory server and play with my normal maximum settings at 1680x1050 and 125 FPS without a hitch. Hash rate dropped to ~25 MHash/s while playing though.



With poclbm I can reach ~39 MHash/s at those clock settings but at the expense of an extremely laggy desktop. I never use the normal rpcminer-cuda because it almost freezes my desktop no matter what settings I use and always yields lower hash rates than poclbm. Conclusion: rpcminer-mod-cuda increased my hash rate by over 20% while keeping a completely smooth desktop and even allowed me to play my favorite game while still keeping a good enough hash rate for the GTS 450. Yes, I was editing the post with my experiments with -gpugrid when you posted.I could get it up to 4096 before the desktop started lagging a little for an increase of ~1 MHash/s. Not worth it in my case, but will probably make a much bigger difference with more powerful cards.Also, lowering -gpugrid to 256 the hash rate only dropped to ~47 MHashes/s and I could play encoded XviD and X264 content smoothly at full screen, and was even able to join my Wolfenstein: Enemy Territory server and play with my normal maximum settings at 1680x1050 and 125 FPS without a hitch. Hash rate dropped to ~25 MHash/s while playing though.With poclbm I can reach ~39 MHash/s at those clock settings but at the expense of an extremely laggy desktop. I never use the normal rpcminer-cuda because it almost freezes my desktop no matter what settings I use and always yields lower hash rates than poclbm. Conclusion: rpcminer-mod-cuda increased my hash rate by over 20% while keeping a completely smooth desktop and even allowed me to play my favorite game while still keeping a good enough hash rate for the GTS 450.

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 11, 2013, 04:59:59 PM #146 So the thread says going from 134 - 330.. I have yet to see any sort of leap from using other miners or rpcminer at default. I realize that people are working on functions to try to improve that and their efforts are greatly appreciated but, I think all of us would like to see a nice bump in our mining with our nvidia cards.



thoughts?

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 11, 2013, 07:22:06 PM #147 Quote from: gateway on April 11, 2013, 04:59:59 PM So the thread says going from 134 - 330.. I have yet to see any sort of leap from using other miners or rpcminer at default. I realize that people are working on functions to try to improve that and their efforts are greatly appreciated but, I think all of us would like to see a nice bump in our mining with our nvidia cards.



thoughts?



134 MHash/s to 330 MHash/s is for a Kepler K20 or the consumer version Titan. A big jump with old NVIDIA GPUs is not possible. CGMIner is very near to the maximum power of old NVIDIA GPUs. Al GPUs before Titan has no funnel opertor and need 3 operation for a rotate, the new one need one operation. 134 MHash/s to 330 MHash/s is for a Kepler K20 or the consumer version Titan. A big jump with old NVIDIA GPUs is not possible. CGMIner is very near to the maximum power of old NVIDIA GPUs. Al GPUs before Titan has no funnel opertor and need 3 operation for a rotate, the new one need one operation.

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 11, 2013, 07:40:49 PM #148 Quote from: psychocoder on April 11, 2013, 07:22:06 PM Quote from: gateway on April 11, 2013, 04:59:59 PM So the thread says going from 134 - 330.. I have yet to see any sort of leap from using other miners or rpcminer at default. I realize that people are working on functions to try to improve that and their efforts are greatly appreciated but, I think all of us would like to see a nice bump in our mining with our nvidia cards.



thoughts?



134 MHash/s to 330 MHash/s is for a Kepler K20 or the consumer version Titan. A big jump with old NVIDIA GPUs is not possible. CGMIner is very near to the maximum power of old NVIDIA GPUs. Al GPUs before Titan has no funnel opertor and need 3 operation for a rotate, the new one need one operation.

134 MHash/s to 330 MHash/s is for a Kepler K20 or the consumer version Titan. A big jump with old NVIDIA GPUs is not possible. CGMIner is very near to the maximum power of old NVIDIA GPUs. Al GPUs before Titan has no funnel opertor and need 3 operation for a rotate, the new one need one operation.

Ahh ok sorry my bad, thanks for the clarification.. so your saying the 690's and below will still be somewhat limited or same speed with out much boost?



I have a gtx 680.. Ahh ok sorry my bad, thanks for the clarification.. so your saying the 690's and below will still be somewhat limited or same speed with out much boost?I have a gtx 680..

gateway



Offline



Activity: 551

Merit: 500







Hero MemberActivity: 551Merit: 500 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 12, 2013, 02:08:31 AM #150 but its not my main mining rig just add's to my pool from my other cards.. So the max I have been able to push my 680 with out oc , is between 120-130.. mostly lows 120's. I would love to even add 100 morebut its not my main mining rig just add's to my pool from my other cards..

slon_ru



Offline



Activity: 24

Merit: 0







NewbieActivity: 24Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 16, 2013, 08:57:47 AM

Last edit: April 16, 2013, 10:26:25 AM by slon_ru #155 Hi charliemaggot !



I have nv670 - 100 m.hash/sec with GUIminer.



If i run rpcminer-mod-cuda.exe with GUIminer i get:



Faulting application name: rpcminer-mod-cuda.exe, version: 0.0.0.0, time stamp: 0x5163a5d3

Faulting module name: MSVCR100.dll, version: 10.0.40219.325, time stamp: 0x4df2bcac

Exception code: 0x40000015

Faulting application path: D:\bitcoin\miner\guiminer\miners\puddinpop\bk\rpcminer-mod-cuda.exe

Faulting module path: D:\bitcoin\miner\guiminer\miners\puddinpop\MSVCR100.dll

Report Id: b1683d16-a64f-11e2-b5bb-5404a61bb659



Display driver nvlddmkm stopped responding and has successfully recovered.







With command line i get:



"Could not retrieve work from RPC server.

CURL return value = 22"



I set TdrDelay and TdrDdiDelay = 10,60.



Can you help me to run rpcminer-mod-cuda.exe ?



br





charliemaggot



Offline



Activity: 79

Merit: 10









MemberActivity: 79Merit: 10 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 16, 2013, 09:54:07 PM #157



CURL error 22 is a HTTP error from the server, is your pool working ok?



If you reduce -gpugrid down to 256 or 512, do you still get the error?



Lastly, please try downloading and running the files from the debug branch, which will hopefully give more information on the crash:



https://github.com/cdmackie/rpcminer-mod/tree/Debug/bin



Again, post any errors. @slon_ru can you post the manual command line (hide your username/password) that causes the crash, and the output you are getting.CURL error 22 is a HTTP error from the server, is your pool working ok?If you reduce -gpugrid down to 256 or 512, do you still get the error?Lastly, please try downloading and running the files from the debug branch, which will hopefully give more information on the crash:Again, post any errors.

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 19, 2013, 05:45:58 AM #170 Quote from: Aggrophobia on April 18, 2013, 06:12:54 PM i have a GTX Titan and i got only 360MHash/s with 1150mhz gpu clock, is it possible that i get more MHashes with linux ?



changing -gputhreads=x has no effect on the rpcminer :/ it stays by 256



Can you please add the full name of your grafic card! Our post a link to the shop and product were you have board the gpu. I think there is no GTX Titan with 1150 mhz. I my theoretic calculation a GTX Titan with 1150mHz must get 500+ MHash/s. Please post the first 20 lines output from rpcminer thats help me to see why you Hashrate is so low.



And yes the paramater -gputhreads=x is disabled because the kernel are compiled for 256 threads only. Can you please add the full name of your grafic card! Our post a link to the shop and product were you have board the gpu. I think there is no GTX Titan with 1150 mhz. I my theoretic calculation a GTX Titan with 1150mHz must get 500+ MHash/s. Please post the first 20 lines output from rpcminer thats help me to see why you Hashrate is so low.And yes the paramater -gputhreads=x is disabled because the kernel are compiled for 256 threads only.

z1ppy



Offline



Activity: 12

Merit: 0









NewbieActivity: 12Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 19, 2013, 07:51:38 PM #175 Quote from: gateway on April 11, 2013, 04:59:59 PM So the thread says going from 134 - 330.. I have yet to see any sort of leap from using other miners or rpcminer at default. I realize that people are working on functions to try to improve that and their efforts are greatly appreciated but, I think all of us would like to see a nice bump in our mining with our nvidia cards.



thoughts?



I saw you mention 560 Ti earlier. That's my card. What hash rate did you see before/after this mod? I'm getting 80-90 mh/s now using GUIMiner w/ rpcminer-cuda.exe but would welcome any additional boost if possible.



If this is about the best I can do with current card, I will likely do a new build + move to Litecoin soon.



thanks I saw you mention 560 Ti earlier. That's my card. What hash rate did you see before/after this mod? I'm getting 80-90 mh/s now using GUIMiner w/ rpcminer-cuda.exe but would welcome any additional boost if possible.If this is about the best I can do with current card, I will likely do a new build + move to Litecoin soon.thanks

3devilred



Offline



Activity: 37

Merit: 0







NewbieActivity: 37Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA April 30, 2013, 08:41:47 AM #193 I'm undermining of coinotron with protocol PPcoin and I noticed a problem, I bring my gtx 560 to 918 mhz and I get 80000 kh / s on the site but I have always been a rate ' of 57.8 mh / s, sometimes I also marks more than 200 mh / s. and ' a problem with the site, or mine? ? thanks



edit: I forgot, many times on the site tells me 0 mh / s.rpcminer but taxis always on 79/80000 kh / s

bitcoiner49er



Offline



Activity: 457

Merit: 250









Sr. MemberActivity: 457Merit: 250 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA May 03, 2013, 08:25:49 PM #196



Code: rpcminer-mod-cuda.exe -url=http

://api.bitcoin.cz:8332 -user=[user] -password=[pasword] -gpu -gputhreads=

256 -aggression=11

Client will start 1 miner threads

Work will be refreshed every 4000 ms

1 GPU GUDA device(s) found

Setting GPU GUDA device 0

CUDA initialized

Search Configuration for gpu named: Quadro FX 580

Your device:

- 4 streaming multiprocessors (SM)

- can run sm version 110

- bitcoin miner is optimized for 2 gpugrids per SM

- max value for gpugrid parameter is 65535

max nonce 3FFFFFFF

Autotuning is on because no gridsize is given

Done allocating CUDA resources for (4088,256)

iterations on gpu=1024 blocks=4088 threads=256

Iterations: 1,hashs per iteration 1071644672

Target = 00000000ffffffffffffffffffffffffffffffffffffffffffffffffffffff

7596697781 khash/s

7584271118 khash/s

7730650780 khash/s

7673471550 khash/s

7482086041 khash/s

7757530201 khash/s

7527873405 khash/s

7349241955 khash/s

7549436565 khash/s

7698070378 khash/s

7347093133 khash/s

7342058776 khash/s

7063781576 khash/s Should I see some other confirmation of hashes? Homo doctus is se semper divitias habet

psychocoder



Offline



Activity: 50

Merit: 0







NewbieActivity: 50Merit: 0 Re: NVIDIA Kepler (K20) from 134MHash/s to 330MHash/s with CUDA May 13, 2013, 06:29:50 AM #200 Quote from: mhps on May 05, 2013, 02:29:11 PM

Code: Found nonce 3FD7CA5E

2013-05-05 13:54:53 Found Hash!

within a period of 100 minutes. But coinotron only reported 19 shares. What is really strange is that the average kh/s reported by coinotron agrees with the hashrate printed by the miner on average. That means the rate at which my miner finds shares seen by the pool is correct, but the "Found Hash! " is 7-8 times too often. Why is that?



EDIT: earlier port #

I am using this to mine PPCoin on coinotron. The setup is the same as in my post #195 . There were 144 thiswithin a period of 100 minutes. But coinotron only reported 19 shares. What is really strange is that the average kh/s reported by coinotron agrees with the hashrate printed by the miner on average. That means the rate at which my miner finds shares seen by the pool is correct, but the "Found Hash! " is 7-8 times too often. Why is that?EDIT: earlier port #

I have never test the miner with PPCoin. It looks like many rejected hashes. I have never test the miner with PPCoin. It looks like many rejected hashes.