Core，又称之为Core Dump文件，是Unix/Linux操作系统的一种机制，对于线上服务而言，Core令人闻之色变，因为出Core的过程意味着服务暂时不能正常响应，需要恢复，并且随着吐Core进程的内存空间越大，此过程可能持续很长一段时间（例如当进程占用60G+以上内存时，完整Core文件需要15分钟才能完全写到磁盘上），这期间产生的流量损失，不可估量。

凡事皆有两面性，OS在出Core的同时，虽然会终止掉当前进程，但是也会保留下第一手的现场数据，OS仿佛是一架被按下快门的相机，而照片就是产出的Core文件。里面含有当进程被终止时内存、CPU寄存器等信息，可以供后续开发人员进行调试。

关于Core产生的原因很多，比如过去一些Unix的版本不支持现代Linux上这种GDB直接附着到进程上进行调试的机制，需要先向进程发送终止信号，然后用工具阅读core文件。在Linux上，我们就可以使用kill向一个指定的进程发送信号或者使用gcore命令来使其主动出Core并退出。如果从浅层次的原因上来讲，出Core意味着当前进程存在BUG，需要程序员修复。从深层次的原因上讲，是当前进程触犯了某些OS层级的保护机制，逼迫OS向当前进程发送诸如SIGSEGV(即signal 11)之类的信号, 例如访问空指针或数组越界出Core，实际上是触犯了OS的内存管理，访问了非当前进程的内存空间，OS需要通过出Core来进行警示，这就好像一个人身体内存在病毒，免疫系统就会通过发热来警示，并导致人体发烧是一个道理（有意思的是，并不是每次数组越界都会出Core，这和OS的内存管理中虚拟页面分配大小和边界有关，即使不出Core，也很有可能读到脏数据，引起后续程序行为紊乱，这是一种很难追查的BUG）。

修改core文件名格式
修改/proc/sys/kernel/core_pattern文件，此文件用于控制Core文件产生的文件名，默认情况下，此文件内容只有一行内容：“core”，此文件支持定制，一般使用%配合不同的字符，这里罗列几种：
- %p 出Core进程的PID
- %u 出Core进程的UID
- %s 造成Core的signal号
- %t 出Core的时间，从1970-01-0100:00:00开始的秒数
- %e 出Core进程对应的可执行文件名

不能直接修改，需要通过下面的方法：
a. vim /etc/sysctl.conf在最后一行添加kernel.core_uses_pid = 1
b. 执行sysctl -p

修改core文件大小

查看core文件的大小
ulimit –a
修改core文件的大小
ulimit –c

文件格式
core文件是ELF格式，可以通过 readelf -h命令查看
[img01]

像bmp、exe等文件一样，ELF的文件头包含整个文件的控制结构。它的定义如下

typedef struct elf32_hdr {  
	unsigned char e_ident[EI_NIDENT];   
	Elf32_Half    e_type;         /* file type */  
	Elf32_Half    e_machine;      /* architecture */  
	Elf32_Word    e_version;  
	Elf32_Addr    e_entry;    	  /* entry point */  
	Elf32_Off 	  e_phoff;        /* PH table offset */  
	Elf32_Off 	  e_shoff;        /* SH table offset */  
	Elf32_Word    e_flags;  
	Elf32_Half    e_ehsize;       /* ELF header size in bytes */  
	Elf32_Half    e_phentsize;    /* PH size */  
	Elf32_Half    e_phnum;        /* PH number */  
	Elf32_Half    e_shentsize;    /* SH size */  
	Elf32_Half    e_shnum;        /* SH number */  
	Elf32_Half    e_shstrndx;     /* SH name string table index */  
} Elf32_Ehdr;

源码

coredump函数在kernel/fs/exec.c中函数为do_coredump( )，如果coredump生成失败可以在do_coredump函数中增加打印，do_coredump的源代码如下所示。

void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
	struct core_state core_state;
	char corename[CORENAME_MAX_SIZE + 1];
	struct mm_struct *mm = current->mm;
	struct linux_binfmt * binfmt;
	const struct cred *old_cred;
	struct cred *cred;
	int retval = 0;
	int flag = 0;
	int ispipe;
	static atomic_t core_dump_count = ATOMIC_INIT(0);
	struct coredump_params cprm = {
		.signr = signr,
		.regs = regs,
		.limit = rlimit(RLIMIT_CORE),
		/*
		 * We must use the same mm->flags while dumping core to avoid
		 * inconsistency of bit flags, since this flag is not protected
		 * by any locks.
		 */
		.mm_flags = mm->flags,
	};
	audit_core_dumps(signr);
	binfmt = mm->binfmt;
	//binfmt->core_dump根据内核宏初始化赋值core_dump函数，未开宏时为NULL
	if (!binfmt || !binfmt->core_dump)
		goto fail;
	if (!__get_dumpable(cprm.mm_flags))
		goto fail;
	cred = prepare_creds();
	if (!cred)
		goto fail;
	/*
	 *	We cannot trust fsuid as being the "true" uid of the
	 *	process nor do we know its entire history. We only know it
	 *	was tainted so we dump it as root in mode 2.
	 */
	if (__get_dumpable(cprm.mm_flags) == 2) {
		/* Setuid core dump mode */
		flag = O_EXCL;		/* Stop rewrite attacks */
		cred->fsuid = 0;	/* Dump root private */
	}
	retval = coredump_wait(exit_code, &core_state);
	if (retval < 0)
		goto fail_creds;
	old_cred = override_creds(cred);
	/*
	 * Clear any false indication of pending signals that might
	 * be seen by the filesystem code called to write the core file.
	 */
	clear_thread_flag(TIF_SIGPENDING);
	//根据/proc/sys/kernel/core_pattern中值定义core文件名
	ispipe = format_corename(corename, signr);
 	if (ispipe) {
		int dump_count;
		char **helper_argv;
		if (cprm.limit == 1) {
			/*
			 * Normally core limits are irrelevant to pipes, since
			 * we're not writing to the file system, but we use
			 * cprm.limit of 1 here as a speacial value. Any
			 * non-1 limit gets set to RLIM_INFINITY below, but
			 * a limit of 0 skips the dump.  This is a consistent
			 * way to catch recursive crashes.  We can still crash
			 * if the core_pattern binary sets RLIM_CORE =  !1
			 * but it runs as root, and can do lots of stupid things
			 * Note that we use task_tgid_vnr here to grab the pid
			 * of the process group leader.  That way we get the
			 * right pid if a thread in a multi-threaded
			 * core_pattern process dies.
			 */
			printk(KERN_WARNING
				"Process %d(%s) has RLIMIT_CORE set to 1\n",
				task_tgid_vnr(current), current->comm);
			printk(KERN_WARNING "Aborting core\n");
			goto fail_unlock;
		}
		cprm.limit = RLIM_INFINITY;
		dump_count = atomic_inc_return(&core_dump_count);
		if (core_pipe_limit && (core_pipe_limit < dump_count)) {
			printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
			       task_tgid_vnr(current), current->comm);
			printk(KERN_WARNING "Skipping core dump\n");
			goto fail_dropcount;
		}
		helper_argv = argv_split(GFP_KERNEL, corename+1, NULL);
		if (!helper_argv) {
			printk(KERN_WARNING "%s failed to allocate memory\n",
			       __func__);
			goto fail_dropcount;
		}
		retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
					NULL, UMH_WAIT_EXEC, umh_pipe_setup,
					NULL, &cprm);
		argv_free(helper_argv);
		if (retval) {
 			printk(KERN_INFO "Core dump to %s pipe failed\n",
			       corename);
			goto close_fail;
 		}
	} else {
		struct inode *inode;
		
		//根据进程的soft limit大小，soft limit大于coredump初始设置最小值=PAGE_SZIE
		if (cprm.limit < binfmt->min_coredump)
			goto fail_unlock;
		cprm.file = filp_open(corename,
				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
				 0600);
		if (IS_ERR(cprm.file))
			goto fail_unlock;
		inode = cprm.file->f_path.dentry->d_inode;
		if (inode->i_nlink > 1)
			goto close_fail;
		if (d_unhashed(cprm.file->f_path.dentry))
			goto close_fail;
		/*
		 * AK: actually i see no reason to not allow this for named
		 * pipes etc, but keep the previous behaviour for now.
		 */
		if (!S_ISREG(inode->i_mode))
			goto close_fail;
		/*
		 * Dont allow local users get cute and trick others to coredump
		 * into their pre-created files.
		 */
		if (inode->i_uid != current_fsuid())
			goto close_fail;
		if (!cprm.file->f_op || !cprm.file->f_op->write)
			goto close_fail;
		if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
			goto close_fail;
	}
	
	//执行core_dump函数输出寄存器等信息到core文件中
	retval = binfmt->core_dump(&cprm);
	if (retval)
		current->signal->group_exit_code |= 0x80;
	if (ispipe && core_pipe_limit)
		wait_for_dump_helpers(cprm.file);
close_fail:
	if (cprm.file)
		filp_close(cprm.file, NULL);
fail_dropcount:
	if (ispipe)
		atomic_dec(&core_dump_count);
fail_unlock:
	coredump_finish(mm);
	revert_creds(old_cred);
fail_creds:
	put_cred(cred);
fail:
	return;
}

不要因为走得太远，就忘记为什么而出发

Linux上Core Dump文件的形成和分析

源码