Time and Timers
Portions of these overheads taken from: Unix Systems Programming Communication, Concurrency, and Threads, by Robbins and Robbins
Time and Timers Portions of these overheads taken from: Unix Systems - - PowerPoint PPT Presentation
Time and Timers Portions of these overheads taken from: Unix Systems Programming Communication, Concurrency, and Threads , by Robbins and Robbins Why Time? What time is it? Arrange for event execution at (after) time t? Which portion
Portions of these overheads taken from: Unix Systems Programming Communication, Concurrency, and Threads, by Robbins and Robbins
What time is it? Arrange for event execution at (after) time t? Which portion of code should be optimized? How can I get make to operate correctly
1.
2.
3.
4.
POSIX specifies systems should keep time in
Each day is 86,400 seconds Epoch: 00:00 (midnight) January 1, 1970
Extensions address higher resolutions
#include <time.h> time_t time(time_t *T); On success, returns seconds since Epoch; -1 otherwise If T not null, also stored in T #include <time.h> double difftime(time_t time1, time_t time0); Returns number of seconds elapsed between time0 and time1, represented as a double. time1, time0 are seconds since Epoch.
Hardware Clock
Runs independently of any control program running on
CPU
Battery powered; runs even while system powered down hwclock(8) to query/set
System Time
Seconds since epoch Kept via clock in Linux kernel
Software counter based on timer interrupt
Initialized using hardware clock on boot date(1) to query/set
Clocks drift over time
Ci(t) denotes value of systems i’s clock at time t Drift rate – relative amount clock rate differs from
(Ci(t1)-Ci(t2)) / (t1-t2)
|Ci(t)-Cj(t)| is clock skew Skew commonly unbounded
Network Time Protocol (NTP)
Protocol to synchronize local clock to external reference
Time server, radio clock, GPS receiver
Clocks synchronized to each other via mutual
synchronization to reference
Avoids discontinuities in clock values
GPS hardware
Allows tight synchronization (nanoseconds) to UTC No network connectivity required Need antenna
char *asctime(const struct tm *timeptr) char *ctime(const time_t *clock) struct tm *gmtime(const time_t *timer) struct tm *localtime(const time_t *timer)
Impact of clock resolution Available resolution
Time Clock Value
Single interval measurement
Off by +/- resolution interval ε
Multiple interval measurements
No limit; +/- ε with each measurement
for (i=0;i<MAX;i++){
add to total }
Total: ε + ε/2 + ε − ε/3 + ε + ε + ε …
Over/under estimates will tend to balance out
struct timeval{ time_t tv_sec; /* seconds since Epoch */ time_t tv_usec; /* and microseconds */ } #include <sys/time.h> int gettimeofday(struct timeval *tp, void *tzp);
Returns zero; tzp must be NULL; tp holds retrieved time Check your system, some do not have microsecond resolution
#define MILLION 1000000L int main(void) { long timedif; struct timeval tpend, tpstart; if (gettimeofday(&tpstart, NULL)) { fprintf(stderr, "Failed to get start time\n"); return 1; } // function_to_time(); sleep(1); if (gettimeofday(&tpend, NULL)) { fprintf(stderr, "Failed to get end time\n"); return 1; } timedif = MILLION*(tpend.tv_sec - tpstart.tv_sec) + tpend.tv_usec - tpstart.tv_usec; printf("The function_to_time took %ld microseconds\n", timedif); return 0; }
[jmayo@asimov ~/codeExamples]$ ./gettimeofdaytiming The function_to_time took 1002038 microseconds [jmayo@asimov ~/codeExamples]$ ./gettimeofdaytiming The function_to_time took 1002047 microseconds [jmayo@asimov ~/codeExamples]$ ./gettimeofdaytiming The function_to_time took 1002046 microseconds
Read Time Stamp Counter (RDTSC) Instruction
Returns clock cycles since power up or reset
#seconds = #cycles/frequency
Two byte instruction: 0F 31 Returns a 64-bit count in EDX:EAX 2^32 cycles/(2,000,000,000HZ)=2.1475 sec 2^64 cycles/(2,000,000,000HZ)=9223372035 sec
153722867.3 min 292.4712 years
rdtsc ; read time stamp mov time, eax ; move ctr into variable fdiv ; floating point divide rdtsc ; read time stamp sub eax, time ; find the difference cpuid ; prior inst must finish rdtsc ; read time stamp mov time, eax ; move ctr into variable fdiv ; floating point divide cpuid ; rdtsc ; read time stamp sub eax, time ; find the difference
/* Typedef for a 64-bit wide unsigned int */ typedef struct { unsigned long low; unsigned long high; } UINT64; /* Read Timer Stamp Counter */ UINT64 rdtsc(void) { UINT64 x; __asm__ volatile(".byte 0x0f,0x31" : "=a" (x.low), "=d" (x.high)); return x; } /* Read Timer Stamp Counter */ unsigned long long int rdtsc(void) { unsigned long long int x; __asm__ volatile(".byte 0x0f,0x31" : "=A" (x)); return x; }
/* Read Timer Stamp Counter */ unsigned long long int rdtsc(void) { unsigned long long int x; __asm__ volatile(".byte 0x0f,0x31" : "=A" (x)); return x; } main(){ unsigned long long llstart,llend; llstart=rdtsc(); /*They say you must do this*/ llstart=rdtsc(); llstart=rdtsc(); sleep(1); llend=rdtsc(); printf("Long long <%llu>\n",llend-llstart); }
asimov% ./rdtsc Long long <2605195700> asimov% ./rdtsc Long long <2604964942> asimov% cat /proc/cpuinfo
processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Pentium(R) 4 CPU 2.60GHz stepping : 9 cpu MHz : 2593.704 cache size : 512 KB physical id : 0 siblings : 2 fdiv_bug : no …
fortran% ./rdtsc Long long <3010263870> jmayo% ./rdtsc Long long <3012787470> jmayo% cat /proc/cpuinfo
processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 3 model name : Intel(R) Pentium(R) 4 CPU 3.00GHz stepping : 3 cpu MHz : 2992.782 cache size : 1024 KB physical id : 0 siblings : 2 fdiv_bug : no …
Impact of multiprocessing Measurement interface
OS on behalf of P1 OS on behalf of P1
Wall Clock Time Wall Clock Time
Idle Idle
1.
Process time: executing instructions in any user process
2.
System time: execution operating system instructions
3.
Idle time: no process running
1.
User time: executing instructions in particular user process +
2.
System time: operating system executing on behalf of particular process
Wall clock time is system time + user time + idle time
Include time process is not execution
TSC measures wall clock time
struct tms{ clock_t tms_utime; /* user CPU time of process*/ clock_t tms_stime; /* system CPU time of process */ clock_t tms_cutime; /* utime for process & term. children */ clock_t tms_cstime; /* stime for process & term. children */ } #include <sys/time.h> int times(struct tms *buffer);
The tms__utime field contains the CPU time spent executing instructions of the calling process. The tms__stime field contains the CPU time spent in the system while executing tasks on behalf of the calling process. The tms__cutime field contains the sum of the tms__utime and tms__cutime values for all waited-for terminated children. The tms__cstime field contains the sum of the tms__stime and tms__cstime values for all waited-for terminated children. Times for terminated children (and their descendants) is added in at the moment wait(2) or waitpid(2) returns their process ID. In particular, times of grandchildren that the children did not wait for are never seen.
All times reported are in clock ticks. The function times returns the number of clock ticks that have Elapsed since an arbitrary point in the past. For Linux this point is the moment the system was booted. This return value may overflow the possible range of type clock_t. On error, (clock_t) -1 is returned, and errno is set appropriately. The number of clock ticks per second can be obtained using sysconf(_SC_CLK_TCK); In POSIX-1996 the symbol CLK_TCK (defined in_<time.h>) is mentioned as obsolescent. It is obsolete now. On Linux, if the disposition of SIGCHLD is set to SIG__IGN then the times of terminated children are automatically included in the tms__cstime and tms__cutime fields, although POSIX 1003.1-2001 says that this should only happen if the calling process wait()s on its children.
#include <unistd.h> main(){ long ticks; ticks=sysconf(_SC_CLK_TCK); printf("<%ld>\n", ticks); }
jmayo@asimov% getTicks <100>
double clockticks; double cticks; struct tms tmend; if (argc < 2){ fprintf (stderr, "Usage: %s command\n", argv[0]); return 1; } if ((child = fork()) == -1) { perror("Failed to fork"); return 1; } if (child == 0) { execvp(argv[1], &argv[1]); perror("Child failed to execvp the command"); return 1; } if (wait(NULL) == -1) { perror("Failed to wait for child"); return 1; }
if (times(&tmend) == (clock_t)-1) { perror("Failed to get end time"); return 1; } if ((clockticks = (double) sysconf(_SC_CLK_TCK)) == -1) { perror("Failed to determine clock ticks per second"); return 1; } if (clockticks == 0) { fprintf(stderr, "Invalid number of ticks per second\n"); return 1; } cticks = tmend.tms_cutime + tmend.tms_cstime
printf("%s used %ld clock ticks or %f seconds\n", argv[1], (long)cticks, cticks/clockticks); return 0; }
#define MAX 900000000 main(int argc, char **argv){ int x; int i; for (i=0,x=0; i<MAX; i++){x++;} } [jmayo@asimov ~/testCode]$ ./timechild loop loop used 209 clock ticks or 2.090000 seconds
#include <sys/types.h> #include <sys/wait.h> #define MAX 900000000 main(int argc, char **argv){ int x; int i; int cpid; for (i=0,x=0; i<MAX; i++){x++;} cpid=fork(); if (cpid==0){ execlp("loop","loop",(char *)0); } wait(&cpid); } [jmayo@asimov ~/testCode]$ ./timechild deepLoop deepLoop used 417 clock ticks or 4.170000 seconds
TIME(1) TIME(1) NAME time - time a simple command or give resource usage SYNOPSIS time [options] command [arguments...] DESCRIPTION The time command runs the specified program command with the given
tics consist of (i) the elapsed real time between invocation and termi- nation, (ii) the user CPU time (the sum of the tms_utime and tms_cutime values in a struct tms as returned by times(2)), and (iii) the system CPU time (the sum of the tms_stime and tms_cstime values in a struct tms as returned by times(2)).
/* Read Timer Stamp Counter */ unsigned long long int rdtsc(void) { unsigned long long int x; __asm__ volatile(".byte 0x0f,0x31" : "=A" (x)); return x; } main(){ unsigned long long llstart,llend; llstart=rdtsc(); /*They say you must do this*/ llstart=rdtsc(); llstart=rdtsc(); sleep(1); llend=rdtsc(); printf("Long long <%llu>\n",llend-llstart); }
[jmayo@asimov ~/codeExamples]$ /usr/bin/time -p ./rdtsc Long long <2597759140> real 1.00 user 0.00 sys 0.00 [jmayo@asimov ~]$ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Pentium(R) 4 CPU 2.60GHz stepping : 9 cpu MHz : 2594.060
#define MAX 500000000 void longLoop(){ int i,x; for (i=0;i<MAX;i++){x++;} } main(){ unsigned long long llstart,llend; llstart=rdtsc(); llstart=rdtsc(); llstart=rdtsc(); // sleep(1); longLoop(); llend=rdtsc(); printf("Long long <%llu>\n",llend-llstart); }
[jmayo@asimov ~/codeExamples]$ /usr/bin/time -p rdtsc2 Long long <3082973544> real 1.19 user 1.18 sys 0.00 [jmayo@asimov ~]$ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 2 model name : Intel(R) Pentium(R) 4 CPU 2.60GHz stepping : 9 cpu MHz : 2594.060 3082973544/2594060000 = 1.1885
Timers and clocks Interval timers Process suspension
Clock – counter that increments at fixed
Timer – generates notification at a specified
Usually decrements value and generates signal at
Coupled with ITIMER_VIRTUAL, this timer is usually used to
profile the time spent by the application in user and kernel space
#include <sys/time.h> int getitimer(int which, struct itimerival *value); int setitimer(int which, const struct itimerval *value, struct itimervalue *ovalue); struct itimerval { struct timeval it_interval; /* next value */ struct timeval it_value; /* current value */ }; struct timeval { long tv_sec; /* seconds */ long tv_usec; /* microseconds */ }; The function getitimer fills the structure indicated by value with the current setting for the timer indicated by which (one of ITIMER_REAL,ITIMER_VIRTUAL, or ITIMER_PROF). The element it_value is set to the amount of time remaining on the timer, or zero if the timer is disabled. Similarly, it_interval is set to the reset value.
The function setitimer sets the indicated timer to the value in value. If
Timers decrement from it_value to zero, generate a signal, and reset to it_interval. A timer which is set to zero (it_value is zero or the timer expires and it_interval is zero) stops. Both tv_sec and tv_usec are significant in determining the duration of a timer. Timers will never expire before the requested time, instead expiring some short, constant time afterwards, dependent on the system timer resolution (currently 10ms). Upon expiration, a signal will be generated and the timer reset. If the timer expires while the process is active (always true for ITIMER_VIRT) the signal will be delivered immediately when generated. Otherwise the delivery will be offset by a small time dependent on the system loading.
static void myhandler(int s) { char aster = '*'; int errsave; errsave = errno; write(STDERR_FILENO, &aster, 1); errno = errsave; } static int setupinterrupt(void) { struct sigaction act; act.sa_handler = myhandler; act.sa_flags = 0; return (sigemptyset(&act.sa_mask) || sigaction(SIGPROF, &act, NULL)); }
static int setupitimer(void) { struct itimerval value; value.it_interval.tv_sec = 2; value.it_interval.tv_usec = 0; value.it_value = value.it_interval; return (setitimer(ITIMER_PROF, &value, NULL)); } int main(void) { if (setupinterrupt() == -1) { perror("Failed to set up handler for SIGPROF"); return 1; } if (setupitimer() == -1) { perror("Failed to set ITIMER_PROF interval timer"); return 1; } for ( ; ; ); }
[jmayo@asimov]$ gcc -o periodicasterisk periodicasterisk.c [jmayo@asimov]$ ./periodicasterisk *************************************************** [jmayo@asimov]$
#include <unistd.h> unsigned sleep(unsigned seconds);
Calling process is suspended until specified number of seconds has elapsed or until signal is caught sleep() may be implemented using SIGALRM; mixing calls to alarm() and sleep() is a bad idea.
#include <time.h> int nanosleep(const struct timespec *rqtp, const struct timespec *rtmp);
Calling process suspends execution until time interval specified by rqtp has elapsed or until thread receives a signal. If signal received and rmtp is not NULL, remaining time written to rtmp. Compared to sleep(3) and usleep(3), nanosleep has the advantage of not affecting any signals,
The structure timespec is used to specify intervals of time with nanosecond precision. It is specified in <time.h> and has the form struct timespec { time_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; The current implementation of nanosleep is based on the normal kernel timer mechanism, which has a resolution of 1/HZ s (i.e, 10 ms on Linux/i386 and 1 ms on Linux/Alpha). Therefore, nanosleep pauses always for at least the specified time, however it can take up to 10 ms longer than specified until the process becomes runnable again. For the same reason, the value returned in case of a delivered signal in *rem is usually rounded to the next larger multiple of 1/HZ s
#define COUNT 100 #define D_BILLION 1000000000.0 #define D_MILLION 1000000.0 #define MILLION 1000000L #define NANOSECONDS 1000 int main(void) { int i; struct timespec slptm; long tdif; struct timeval tend, tstart; slptm.tv_sec = 0; slptm.tv_nsec = NANOSECONDS; if (gettimeofday(&tstart, NULL) == -1) { fprintf(stderr, "Failed to get start time\n"); return 1; } for (i = 0; i < COUNT; i++) if (nanosleep(&slptm, NULL) == -1) { perror("Failed to nanosleep"); return 1; }
if (gettimeofday(&tend, NULL) == -1) {
fprintf(stderr, "Failed to get end time\n"); return 1; } tdif = MILLION*(tend.tv_sec - tstart.tv_sec) + tend.tv_usec - tstart.tv_usec; printf("%d nanosleeps of %d nanoseconds\n", COUNT, NANOSECONDS); printf("Should take %11d microseconds or %f seconds\n", NANOSECONDS*COUNT/1000, NANOSECONDS*COUNT/D_BILLION); printf("Actually took %11ld microseconds or %f seconds\n", tdif, tdif/D_MILLION); printf("Number of seconds per nanosleep was %f\n", (tdif/(double)COUNT)/MILLION); printf("Number of seconds per nanosleep should be %f\n", NANOSECONDS/D_BILLION); return 0; }
100 nanosleeps of 1000 nanoseconds Should take 100 microseconds or 0.000100 seconds Actually took 1995703 microseconds or 1.995703 seconds Number of seconds per nanosleep was 0.019957 Number of seconds per nanosleep should be 0.000001 100 nanosleeps of 1000 nanoseconds Should take 100 microseconds or 0.000100 seconds Actually took 1990988 microseconds or 1.990988 seconds Number of seconds per nanosleep was 0.019910 Number of seconds per nanosleep should be 0.000001 100 nanosleeps of 1000 nanoseconds Should take 100 microseconds or 0.000100 seconds Actually took 1997636 microseconds or 1.997636 seconds Number of seconds per nanosleep was 0.019976 Number of seconds per nanosleep should be 0.000001 100 nanosleeps of 1000 nanoseconds Should take 100 microseconds or 0.000100 seconds Actually took 1990884 microseconds or 1.990884 seconds Number of seconds per nanosleep was 0.019909 Number of seconds per nanosleep should be 0.000001
int counter(int cnt){ int i,x; for (i=0;i<cnt;i++) { /* BUSY WAIT */ x++; /* LENGTH DETERMINED BY cnt */ } } int Two(int cnt){ /* PASS THROUGH TO counter() */ counter(cnt); } /* argv[1] determines time in counter() on each call */ /* argv[2] is times counter() called directly from main */ /* argv[3] is number of times counter called through Two() */ main(int argc, char **argv){ int i; for (i=0;i<atoi(argv[2]);i++) counter(atoi(argv[1])); for (i=0;i<atoi(argv[3]);i++){ Two(atoi(argv[1]));} }
900000 increments in counter() each call call counter() 50 times from main() call counter() 200 times through Two()
_____________________________________________________________
jmayo% gcc –pg –o calls calls.c jmayo% ./calls 900000 50 200
Flat profile: Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls ms/call ms/call name 100.00 0.51 0.51 250 2.04 2.04 counter 0.00 0.51 0.00 200 0.00 2.04 Two
% the percentage of the total running time of the time program used by this function.
self the number of seconds accounted for by this seconds function alone. This is the major sort for this listing. calls the number of times this function was invoked, if this function is profiled, else blank. self the average number of milliseconds spent in this ms/call function per call, if this function is profiled, else blank. total the average number of milliseconds spent in this ms/call function and its descendents per call, if this function is profiled, else blank.
int counter( int cnt){ int i,x; for (i=0;i<cnt;i++){ x++; } } int main(int argc, char **argv) { long timedif; struct timeval tpend, tpstart; if (gettimeofday(&tpstart, NULL)) { fprintf(stderr, "Failed to get start time\n"); return 1;} counter(atoi(argv[1])); if (gettimeofday(&tpend, NULL)) { fprintf(stderr, "Failed to get end time\n"); return 1;} timedif = MILLION*(tpend.tv_sec - tpstart.tv_sec) + tpend.tv_usec - tpstart.tv_usec; printf("The function_to_time took %ld microseconds\n", timedif); return 0; }
jmayo% ./gettimeofdaytiming 900000 The function_to_time took 2101 microseconds jmayo% ./gettimeofdaytiming 900000 The function_to_time took 2091 microseconds jmayo% ./gettimeofdaytiming 900000 The function_to_time took 2090 microseconds
Call graph
granularity: each sample hit covers 4 byte(s) for 1.96%
index % time self children called name 0.10 0.00 50/250 main [2] 0.41 0.00 200/250 Two [3] [1] 100.0 0.51 0.00 250 counter [1]
[2] 100.0 0.00 0.51 main [2] 0.00 0.41 200/200 Two [3] 0.10 0.00 50/250 counter [1]
[3] 80.0 0.00 0.41 200 Two [3] 0.41 0.00 200/250 counter [1]
Each entry in this table consists of several lines. The line with the index number at the left hand margin lists the current function.The lines above it list the functions that called this function,and the lines below it list the functions this one called. This line lists: index A unique number given to each element
to every function name so it is easier to look up where the function in the table. % time This is the percentage of the `total' time that was spent in this function and its children. Note that due to different viewpoints, functions excluded by options, etc, these numbers will NOT add up to 100%. self This is the total amount of time spent in this function.
Children This is the total amount of time propagated into this function by its children. Called This is the number of times the function was
the number only includes non-recursive calls, and is followed by a `+' and the number of recursive calls. name The name of the current function. The index number is printed after it. If the function is a member of a cycle, the cycle number is printed between the function's name and the index number.
For the function's parents, the fields have the following meanings: self This is the amount of time that was propagated directly from the function into this parent. children This is the amount of time that was propagated from the function's children into this parent. called This is the number of times this parent called the function `/' the total number of times the function was called. Recursive calls to the function are not included in the number after the `/'. name This is the name of the parent. The parent's index number is printed after it. If the parent is a member of a cycle, the cycle number is printed between the name and the index number.
For the function's children, the fields have the following meanings: self This is the amount of time that was propagated directly from the child into the function. Children This is the amount of time that was propagated from the child's children to the function. called This is the number of times the function called this child `/' the total number of times the child was called. Recursive calls by the child are not listed in the number after the `/'. name This is the name of the child. The child's index number is printed after it. If the child is a member of a cycle, the cycle number is printed between the name and the index number.
If there are any cycles (circles) in the call graph, there is an entry for the cycle-as-a-whole. This entry shows who called the cycle (as parents) and the members of the cycle (as children.) The `+' recursive calls entry shows the number of function calls that were internal to the cycle, and the calls entry for each member shows, for that member, how many times it was called from other members of the cycle.
#include <stdio.h> #include <sys/time.h> #define MILLION 1000000L void function_to_time(void); int main(void) { long timedif; struct timeval tpend; struct timeval tpstart; if (gettimeofday(&tpstart, NULL)) { fprintf(stderr, "Failed to get start time\n"); return 1; } function_to_time(); /* timed code goes here */ if (gettimeofday(&tpend, NULL)) { fprintf(stderr, "Failed to get end time\n"); return 1; } timedif = MILLION*(tpend.tv_sec - tpstart.tv_sec) + tpend.tv_usec - tpstart.tv_usec; printf("The function_to_time took %ld microseconds\n", timedif); return 0;
}
if (gettimeofday(&tlast, NULL)) {
fprintf(stderr, "Failed to get first gettimeofday.\n"); return 1; } while (numdone < NUMDIF) { numcalls++; if (gettimeofday(&tthis, NULL)) { fprintf(stderr, "Failed to get a later gettimeofday.\n"); return 1; } timedif[numdone]= MILLION*(tthis.tv_sec - tlast.tv_sec) + tthis.tv_usec - tlast.tv_usec; if (timedif[numdone] != 0) { numdone++; tlast = tthis; } }
printf("Found %d differences in gettimeofday:\n", NUMDIF); printf("%d calls to gettimeofday were required\n", numcalls); for (i = 0; i < NUMDIF; i++) { printf("%2d: %10ld microseconds\n", i, timedif[i]); sum += timedif[i]; } printf("The average nonzero difference is %f\n", sum/(double)NUMDIF); return 0;
Found 20 differences in gettimeofday: 31 calls to gettimeofday were required 0: 1 microseconds ... 19: 1 microseconds The average nonzero difference is 1.000000 Found 20 differences in gettimeofday: 32 calls to gettimeofday were required 0: 1 microseconds ... 19: 1 microseconds The average nonzero difference is 1.000000 Found 20 differences in gettimeofday: 33 calls to gettimeofday were required 0: 1 microseconds ... 19: 1 microseconds The average nonzero difference is 1.000000 Found 20 differences in gettimeofday: 31 calls to gettimeofday were required 0: 1 microseconds ... 19: 1 microseconds The average nonzero difference is 1.000000
double MHZ; int sleep_time = 10; start_counter(); sleep(sleep_time); MHZ = get_counter()/(sleep_time * 1e6);
int cnt = 1; double cmeas = 0; double cycles; do { int c = cnt; P(); /* Warm up cache */ get_counter(); while (c-- > 0) P(); cmeas = get_counter(); cycles = cmeas / cnt; cnt += cnt; } while (cmeas < CMIN); /* Make sure have enough */ return cycles / (1e6 * MHZ);
#include <stdio.h> #include <time.h> void function_to_time(void); int main(void) { time_t tstart; tstart = time(NULL); function_to_time(); printf("function_to_time took %f seconds of elapsed time\n", difftime(time(NULL), tstart)); return 0; }
#define ROWS 2048 #define COLUMNS 5 int bigArray[ROWS][COLUMNS]; void initArray(){ int i,j; for (i=0;i<ROWS;i++){ for (j=0;j<COLUMNS;j++){ bigArray[i][j]=0; } } } unsigned long long int rdtsc(void) { unsigned long long int x; __asm__ volatile(".byte 0x0f,0x31" : "=A" (x)); return x; }
main(int argc, char **argv){ unsigned long long llstart,llend; int i,stop; llstart=rdtsc(); llstart=rdtsc(); stop=atoi(argv[1]); for (i=0;i<stop;i++){ llstart=rdtsc(); initArray(); llend=rdtsc(); printf("Call %d: <%llu>\n",i,llend-llstart); } }
Call 0: <228648> Call 1: <147844> Call 2: <147556> Call 3: <146964> Call 4: <147036> Call 5: <147036> Call 6: <147036> Call 7: <147036> Call 8: <147036> Call 9: <147036> Call 0: <232984> Call 1: <155088> Call 2: <154576> Call 3: <154468> Call 4: <154412> Call 5: <154412> Call 6: <154412> Call 7: <154436> Call 8: <154416> Call 9: <154416> Call 0: <231352> Call 1: <155276> Call 2: <157252> Call 3: <154528> Call 4: <154528> Call 5: <154592> Call 6: <154552> Call 7: <154528> Call 8: <154528> Call 9: <154528>
main(int argc, char **argv){ unsigned long long llstart,llend, total; int i,stop; llstart=rdtsc(); llstart=rdtsc(); initArray(); /* Warm up the cache */ stop=atoi(argv[1]); total=0; for (i=0;i<stop;i++){ llstart=rdtsc(); initArray(); llend=rdtsc(); total=total+llend-llstart; /* Get an average Watch out for overflow */ } printf("Avg. over %d runs: <%llu>\n", stop,total/(long long)stop); }