##How atomic variables work
Intel的x86和x86_64架构当访问内存时,有锁住前端总线(FSB)的指令,前端总线是处理器和RAM通信的通道,锁住FSB可以阻止其它处理器或进程在当前处理器运行,也可以阻止访问RAM。
##原子变量的字节数限制
Intel的专家建议并不用每次访问内存都要锁住FSB,Intel处理器允许memcpy() and memcmp()允许在一个处理器上执行,但锁住大块内存的代价太大。实际使用中,当访问1, 2, 4, 8字节的整数时可以锁住FSB,gcc支持int、long、long long的原子操作。
##什么情况下不能使用原子变量
##gcc支持的原子操作函数
使用原子操作的例子
int global_int = 0;
pid_t gettid( void )
{
return syscall( __NR_gettid );
}
void *thread_routine( void *arg )
{
int i;
int proc_num = (int)(long)arg;
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))
{
perror( "sched_setaffinity" );
return NULL;
}
for (i = 0; i < INC_TO; i++)
{
global_int++;
//__sync_fetch_and_add( &global_int, 1 );
}
return NULL;
}
int main()
{
int procs = 0;
int i;
pthread_t *thrs;
// Getting number of CPUs
procs = (int)sysconf( _SC_NPROCESSORS_ONLN );
if (procs < 0)
{
perror( "sysconf" );
return -1;
}
else
{
std::cout << "cpu nums:" << procs << std::endl;
}
thrs = (pthread_t*)malloc( sizeof( pthread_t ) * procs );
if (thrs == NULL)
{
perror( "malloc" );
return -1;
}
printf( "Starting %d threads...\n", procs );
for (i = 0; i < procs; i++)
{
if (pthread_create( &thrs[i], NULL, thread_routine,
(void *)(long)i ))
{
perror( "pthread_create" );
procs = i;
break;
}
}
for (i = 0; i < procs; i++)
pthread_join( thrs[i], NULL );
free( thrs );
printf( "After doing all the math, global_int value is: %d\n",
global_int );
printf( "Expected value is: %d\n", INC_TO * procs );
return 0;
}
平均耗时90ms
同样的逻辑使用互斥锁的例子
int global_int = 0;
pthread_mutex_t mutex;
pid_t gettid( void )
{
return syscall( __NR_gettid );
}
uint64_t get_time()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec*1000 + tv.tv_usec/1000;
}
void *thread_routine( void *arg )
{
int i;
int proc_num = (int)(long)arg;
cpu_set_t set;
CPU_ZERO( &set );
CPU_SET( proc_num, &set );
if (sched_setaffinity( gettid(), sizeof( cpu_set_t ), &set ))
{
perror( "sched_setaffinity" );
return NULL;
}
for (i = 0; i < INC_TO; i++)
{
pthread_mutex_lock(&mutex);
global_int++;
pthread_mutex_unlock(&mutex);
}
return NULL;
}
int main()
{
int procs = 0;
int i;
pthread_t *thrs;
// Getting number of CPUs
procs = (int)sysconf( _SC_NPROCESSORS_ONLN );
if (procs < 0)
{
perror( "sysconf" );
return -1;
}
else
{
std::cout << "cpu nums:" << procs << std::endl;
}
thrs = (pthread_t*)malloc( sizeof( pthread_t ) * procs );
if (thrs == NULL)
{
perror( "malloc" );
return -1;
}
printf( "Starting %d threads...\n", procs );
uint64_t begin_ts = get_time();
pthread_mutex_init(&mutex, NULL);
for (i = 0; i < procs; i++)
{
if (pthread_create( &thrs[i], NULL, thread_routine,
(void *)(long)i ))
{
perror( "pthread_create" );
procs = i;
break;
}
}
for (i = 0; i < procs; i++)
pthread_join( thrs[i], NULL );
pthread_mutex_destroy(&mutex);
uint64_t end_ts = get_time();
printf("time costs:%lld\n", (long long int)end_ts-begin_ts);
free( thrs );
printf( "After doing all the math, global_int value is: %d\n",
global_int );
printf( "Expected value is: %d\n", INC_TO * procs );
return 0;
}
平均耗时400ms,可以看出原子操作相比互斥锁有很大的性能优势.