Skip to content
Commits on Source (4)
plink1.9 (1.90~b6.10-190617-1) UNRELEASED; urgency=medium
* New upstream release.
* Bump debhelper compat 12.
-- Dylan Aïssi <daissi@debian.org> Fri, 13 Sep 2019 22:03:17 +0200
plink1.9 (1.90~b6.9-190304-2) unstable; urgency=medium
* Bump Standards-Version: 4.4.0 (no changes needed).
......
......@@ -3,7 +3,7 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Dylan Aïssi <daissi@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 11~),
Build-Depends: debhelper (>= 12~),
help2man,
libatlas-base-dev,
liblapack-dev,
......
# Copy/Paste from https://www.cog-genomics.org/plink/1.9/
4 Mar 2019: Brackets in command-line help text are now used in a manner more similar to other tools. --tests now works properly when a numeric argument is 1 past the end.
17 Jun 2019: --loop-assoc now works properly when the original .fam file has missing phenotype values.
4 Mar: Brackets in command-line help text are now used in a manner more similar to other tools. --tests now works properly when a numeric argument is 1 past the end.
15 Feb: Logistic regression no longer reports intercept beta values when it's supposed to report odds ratios. Scientific notation can now be used for command-line integer parameters.
......
This diff is collapsed.
......@@ -3193,478 +3193,6 @@ void normalize_phenos(double* new_phenos, uint32_t sample_ct, uintptr_t* sample_
}
}
/*
int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t max_pcs, FILE* bedfile, uintptr_t bed_offset, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t sample_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, double* pheno_d, double missing_phenod, char* outname, char* outname_end, uint32_t hh_exists) {
FILE* outfile = nullptr;
FILE* evecfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t* sample_include2 = nullptr;
uintptr_t* sample_male_include2 = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
uintptr_t marker_uidx = 0;
uint32_t pc_ct = 0;
uint32_t pct = 1;
uint32_t is_eigenvec = 0; // GCTA .eigenvec format instead of SMARTPCA .evec?
// er, this external file requirement is silly, we should replicate the most
// common SMARTPCA options to avoid creating a usability headache (while
// still supporting SMARTPCA import for power users)
uint32_t chrom_end = 0;
uint32_t chrom_fo_idx = 0;
uint32_t regress_pcs_sex_specific = regress_pcs_modifier & REGRESS_PCS_SEX_SPECIFIC;
uint32_t regress_pcs_clip = regress_pcs_modifier & REGRESS_PCS_CLIP;
int32_t retval = 0;
char wbuf[32];
uintptr_t pc_ct_p1; // plus 1 to account for intercept
double* pc_matrix;
double* pc_orig_prod_sums; // pc_ct_p1 * pc_ct_p1, upper triangle filled
double* pc_prod_sums; // (X'X)^{-1}
double* x_prime_y; // X'Y
double* beta_vec; // (X'X)^{-1}X'Y
double* residual_vec;
uintptr_t marker_idx;
uintptr_t sample_uidx;
uintptr_t sample_idx;
uintptr_t* loadbuf_raw;
uintptr_t* loadbuf;
uintptr_t* ulptr;
uintptr_t* ulptr_end;
uint32_t* missing_cts;
char* bufptr;
char* id_buf;
uintptr_t cur_word;
uintptr_t ulii;
uintptr_t uljj;
uintptr_t ulkk;
uint32_t is_haploid;
uint32_t is_x;
uint32_t is_y;
uint32_t uii;
char* sample_id_ptr;
MATRIX_INVERT_BUF1_TYPE* inv_1d_buf;
double* dbl_2d_buf;
double dxx;
if (bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf_raw) ||
bigstack_alloc_ul(sample_ctv2, &loadbuf) ||
bigstack_alloc_ui(sample_ct, &missing_cts) ||
bigstack_alloc_c(max_sample_id_len, &id_buf)) {
goto calc_regress_pcs_ret_NOMEM;
}
if (alloc_collapsed_haploid_filters(sample_exclude, sex_male, unfiltered_sample_ct, sample_ct, hh_exists, 0, &sample_include2, &sample_male_include2)) {
goto calc_regress_pcs_ret_NOMEM;
}
// try unaltered filename. If that fails and the unaltered filename didn't
// have an .evec or .eigenvec extension, then also try appending .evec and
// appending .eigenvec.
evecfile = fopen(evecname, "r");
if (!evecfile) {
ulii = strlen(evecname);
if (((ulii >= 5) && (!memcmp(".evec", &(evecname[ulii - 5]), 5))) || ((ulii >= 9) && (!memcmp(".eigenvec", &(evecname[ulii - 9]), 9)))) {
goto calc_regress_pcs_ret_OPEN_FAIL;
}
strcpy(&(evecname[ulii]), ".evec");
evecfile = fopen(evecname, "r");
if (!evecfile) {
strcpy(&(evecname[ulii]), ".eigenvec");
if (fopen_checked(evecname, "r", &evecfile)) {
goto calc_regress_pcs_ret_OPEN_FAIL;
}
}
}
g_textbuf[MAXLINELEN - 7] = ' ';
g_textbuf[MAXLINELEN - 1] = ' ';
if (!fgets(g_textbuf, MAXLINELEN - 6, evecfile)) {
if (feof(evecfile)) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
} else {
goto calc_regress_pcs_ret_READ_FAIL;
}
}
if (!g_textbuf[MAXLINELEN - 7]) {
logerrprint("Error: Excessively long line in .evec/.eigenvec file.\n");
goto calc_regress_pcs_ret_INVALID_FORMAT;
}
bufptr = skip_initial_spaces(g_textbuf);
if (no_more_tokens_kns(bufptr)) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
if (memcmp(bufptr, "#eigvals:", 9)) {
is_eigenvec = 1;
bufptr = next_token(bufptr);
}
bufptr = next_token(bufptr);
while ((!no_more_tokens_kns(bufptr)) && ((*bufptr == '-') || ((*bufptr >= '0') && (*bufptr <= '9')))) {
pc_ct++;
bufptr = next_token(bufptr);
}
if (!pc_ct) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
if (pc_ct > max_pcs) {
sprintf(g_logbuf, "%svec format detected. Regressing on %d PC%s (out of %d).\n", is_eigenvec? "GCTA .eigen" : "SMARTPCA .e", max_pcs, (max_pcs == 1)? "" : "s", pc_ct);
pc_ct = max_pcs;
} else {
sprintf(g_logbuf, "%svec format detected. Regressing on %d principal component%s.\n", is_eigenvec? "GCTA .eigen" : "SMARTPCA .e", pc_ct, (pc_ct == 1)? "" : "s");
}
logprintb();
pc_ct_p1 = pc_ct + 1;
if (bigstack_alloc_d(pc_ct_p1 * sample_ct, &pc_matrix) ||
bigstack_alloc_d(pc_ct_p1 * pc_ct_p1, &pc_orig_prod_sums) ||
bigstack_alloc_d(pc_ct_p1 * pc_ct_p1, &pc_prod_sums) ||
bigstack_alloc_d(pc_ct_p1, &x_prime_y) ||
bigstack_alloc_d(pc_ct_p1, &beta_vec) ||
bigstack_alloc_d(pc_ct_p1, &residual_vec) ||
bigstack_alloc_d(pc_ct_p1 * pc_ct_p1, &dbl_2d_buf)) {
goto calc_regress_pcs_ret_NOMEM;
}
inv_1d_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(pc_ct_p1 * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!inv_1d_buf) {
goto calc_regress_pcs_ret_NOMEM;
}
if (is_eigenvec) {
sample_idx = 0;
while (1) {
// todo: validate, and perhaps permute, sample IDs
bufptr = next_token_mult(skip_initial_spaces(g_textbuf), 2);
for (uii = 0; uii < pc_ct; uii++) {
if (no_more_tokens_kns(bufptr)) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
if (scan_double(bufptr, &(pc_matrix[uii * sample_ct + sample_idx]))) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
bufptr = next_token(bufptr);
}
pc_matrix[pc_ct * sample_ct + sample_idx] = 1.0; // intercept
if (++sample_idx >= sample_ct) {
break;
}
if (!fgets(g_textbuf, MAXLINELEN, evecfile)) {
if (feof(evecfile)) {
sprintf(g_logbuf, "Error: Fewer %s in .eigenvec file than expected.\n", g_species_plural);
goto calc_regress_pcs_ret_INVALID_FORMAT_3;
} else {
goto calc_regress_pcs_ret_READ_FAIL;
}
}
}
} else {
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
if (!fgets(g_textbuf, MAXLINELEN, evecfile)) {
if (feof(evecfile)) {
sprintf(g_logbuf, "Error: Fewer %s in .evec file than expected.\n", g_species_plural);
goto calc_regress_pcs_ret_INVALID_FORMAT_3;
} else {
goto calc_regress_pcs_ret_READ_FAIL;
}
}
bufptr = next_token(skip_initial_spaces(g_textbuf));
for (uii = 0; uii < pc_ct; uii++) {
if (no_more_tokens_kns(bufptr)) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
if (scan_double(bufptr, &(pc_matrix[uii * sample_ct + sample_idx]))) {
goto calc_regress_pcs_ret_INVALID_FORMAT_2G;
}
bufptr = next_token(bufptr);
}
pc_matrix[pc_ct * sample_ct + sample_idx] = 1.0;
}
}
if (fgets(g_textbuf, MAXLINELEN, evecfile)) {
if (!no_more_tokens_kns(skip_initial_spaces(g_textbuf))) {
sprintf(g_logbuf, "Error: More %s in .e%svec file than expected.\n", g_species_plural, is_eigenvec? "igen" : "");
goto calc_regress_pcs_ret_INVALID_FORMAT_3;
}
}
fclose_null(&evecfile);
// precalculate (X'X)
// er, check if there's a faster way to do this...
fill_double_zero(pc_ct_p1 * pc_ct_p1, pc_orig_prod_sums);
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
for (uljj = ulii; uljj < pc_ct_p1; uljj++) {
pc_orig_prod_sums[ulii * pc_ct_p1 + uljj] += pc_matrix[ulii * sample_ct + sample_idx] * pc_matrix[uljj * sample_ct + sample_idx];
}
}
}
fill_uint_zero(sample_ct, missing_cts);
refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_haploid);
// .gen instead of .bgen because latter actually has lower precision(!) (15
// bits instead of the ~20 you get from printf("%g", dxx)), and there's no
// need for repeated random access.
strcpy(outname_end, ".gen");
if (fopen_checked(outname, "w", &outfile)) {
goto calc_regress_pcs_ret_OPEN_FAIL;
}
if (fseeko(bedfile, bed_offset, SEEK_SET)) {
goto calc_regress_pcs_ret_READ_FAIL;
}
for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
if (IS_SET(marker_exclude, marker_uidx)) {
marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
goto calc_regress_pcs_ret_READ_FAIL;
}
}
if (marker_uidx >= chrom_end) {
chrom_fo_idx++;
refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_haploid);
}
if (load_and_collapse(unfiltered_sample_ct, sample_ct, sample_exclude, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, loadbuf)) {
goto calc_regress_pcs_ret_READ_FAIL;
}
if (is_haploid && hh_exists) {
haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf);
}
bufptr = chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_uidx), g_textbuf);
*bufptr++ = ' ';
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
g_textbuf[0] = ' ';
bufptr = uint32toa_x(marker_pos[marker_uidx], ' ', &(g_textbuf[1]));
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
putc_unlocked(' ', outfile);
if (fputs_checked(marker_allele_ptrs[2 * marker_uidx + 1], outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
memcpy(pc_prod_sums, pc_orig_prod_sums, pc_ct_p1 * pc_ct_p1 * sizeof(double));
fill_double_zero(pc_ct_p1, x_prime_y);
sample_idx = 0;
sample_uidx = BITCT2; // repurposed as end-of-word
ulptr = loadbuf;
ulptr_end = &(loadbuf[sample_ct / BITCT2]);
while (1) {
while (ulptr < ulptr_end) {
cur_word = *loadbuf++;
for (; sample_idx < sample_uidx; sample_idx++) {
ulii = cur_word & 3;
if (ulii != 1) {
ulii = ulii - (ulii >> 1);
for (uljj = 0; uljj < pc_ct_p1; uljj++) {
x_prime_y[uljj] += pc_matrix[uljj * sample_ct + sample_idx] * ulii;
}
} else {
missing_cts[sample_idx] += 1;
for (uljj = 0; uljj < pc_ct_p1; uljj++) {
for (ulkk = uljj; ulkk < pc_ct_p1; ulkk++) {
pc_prod_sums[uljj * pc_ct_p1 + ulkk] -= pc_matrix[uljj * sample_ct + sample_idx] * pc_matrix[ulkk * sample_ct + sample_idx];
}
}
}
cur_word >>= 2;
}
sample_uidx += BITCT2;
}
if (sample_idx == sample_ct) {
break;
}
ulptr_end++;
sample_uidx = sample_ct;
}
// last-minute update of lower triangle
for (ulii = 1; ulii < pc_ct_p1; ulii++) {
for (uljj = 0; uljj < ulii; uljj++) {
pc_prod_sums[ulii * pc_ct_p1 + uljj] = pc_prod_sums[uljj * pc_ct_p1 + ulii];
}
}
invert_matrix(pc_ct_p1, pc_prod_sums, inv_1d_buf, dbl_2d_buf);
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
dxx = 0.0;
for (uljj = 0; uljj < pc_ct_p1; uljj++) {
dxx += pc_prod_sums[ulii * pc_ct_p1 + uljj] * x_prime_y[uljj];
}
beta_vec[ulii] = dxx;
}
wbuf[0] = ' ';
sample_idx = 0;
sample_uidx = BITCT2;
ulptr = loadbuf;
ulptr_end = &(loadbuf[sample_ct / BITCT2]);
while (1) {
while (ulptr < ulptr_end) {
cur_word = *loadbuf++;
for (; sample_idx < sample_uidx; sample_idx++) {
ulii = cur_word & 3;
if (ulii == 1) {
fputs(" 0 0 0", outfile);
} else {
ulii = ulii - (ulii >> 1);
dxx = 0.0;
for (uljj = 0; uljj < pc_ct_p1; uljj++) {
dxx += pc_matrix[uljj * sample_ct + sample_idx] * beta_vec[uljj];
}
dxx = (double)((intptr_t)ulii) - dxx;
// now dxx is the residual, normally but not always in [0, 2]
if (dxx < 1.0) {
if (dxx < 0.0) {
if (regress_pcs_clip) {
fputs(" 1 0 0", outfile);
} else {
bufptr = dtoa_g(1.0 - dxx * 0.5, &(wbuf[1]));
bufptr = memcpyl3a(bufptr, " 0 ");
bufptr = dtoa_g(dxx * 0.5, bufptr);
fwrite(wbuf, 1, bufptr - wbuf, outfile);
}
} else {
bufptr = dtoa_gx(1.0 - dxx, ' ', &(wbuf[1]));
bufptr = dtoa_g(dxx, bufptr);
bufptr = memcpya(bufptr, " 0", 2);
fwrite(wbuf, 1, bufptr - wbuf, outfile);
}
} else {
if (dxx > 2.0) {
if (regress_pcs_clip) {
fputs(" 0 0 1", outfile);
} else {
bufptr = dtoa_g(1.0 - dxx * 0.5, &(wbuf[1]));
bufptr = memcpyl3a(bufptr, " 0 ");
bufptr = dtoa_g(dxx * 0.5, bufptr);
fwrite(wbuf, 1, bufptr - wbuf, outfile);
}
} else {
bufptr = memcpya(&(wbuf[1]), "0 ", 2);
bufptr = dtoa_gx(2.0 - dxx, ' ', bufptr);
bufptr = dtoa_g(dxx - 1.0, bufptr);
fwrite(wbuf, 1, bufptr - wbuf, outfile);
}
}
}
}
sample_uidx += BITCT2;
}
if (sample_idx == sample_ct) {
break;
}
ulptr_end++;
sample_uidx = sample_ct;
}
if (putc_checked('\n', outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
if (marker_idx * 100LLU >= ((uint64_t)pct * marker_ct)) {
pct = ((uint64_t)marker_idx * 100) / marker_ct;
printf("\r%d%%", pct++);
fflush(stdout);
}
}
if (fclose_null(&outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
strcpy(outname_end, ".sample");
if (fopen_checked(outname, "w", &outfile)) {
goto calc_regress_pcs_ret_OPEN_FAIL;
}
if (fputs_checked("ID_1 ID_2 missing sex phenotype\n0 0 0 D P\n", outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
// regress phenotype
fill_double_zero(pc_ct_p1, x_prime_y);
sample_uidx = 0;
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
dxx = pheno_d[sample_uidx];
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
x_prime_y[ulii] += pc_matrix[ulii * sample_ct + sample_idx] * dxx;
}
}
for (ulii = 1; ulii < pc_ct_p1; ulii++) {
for (uljj = 0; uljj < ulii; uljj++) {
pc_orig_prod_sums[ulii * pc_ct_p1 + uljj] = pc_orig_prod_sums[uljj * pc_ct_p1 + ulii];
}
}
invert_matrix(pc_ct_p1, pc_orig_prod_sums, inv_1d_buf, dbl_2d_buf);
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
dxx = 0.0;
for (uljj = 0; uljj < pc_ct_p1; uljj++) {
dxx += pc_orig_prod_sums[ulii * pc_ct_p1 + uljj] * x_prime_y[uljj];
}
beta_vec[ulii] = dxx;
}
sample_uidx = 0;
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
dxx = 0.0;
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
dxx += pc_matrix[ulii * sample_ct + sample_idx] * beta_vec[ulii];
}
residual_vec[sample_idx] = pheno_d[sample_uidx] - dxx;
}
if (regress_pcs_modifier & REGRESS_PCS_NORMALIZE_PHENO) {
if (regress_pcs_sex_specific) {
normalize_phenos(residual_vec, sample_ct, sample_exclude, sex_nm, sex_male, 1);
normalize_phenos(residual_vec, sample_ct, sample_exclude, sex_nm, sex_male, 2);
normalize_phenos(residual_vec, sample_ct, sample_exclude, sex_nm, sex_male, 4);
} else {
normalize_phenos(residual_vec, sample_ct, sample_exclude, sex_nm, sex_male, 7);
}
}
sample_uidx = 0;
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
sample_id_ptr = &(sample_ids[sample_uidx * max_sample_id_len]);
uii = strlen_se(sample_id_ptr);
// todo: adjust pheno_d, double-check missing gender behavior
fwrite(sample_id_ptr, 1, uii, outfile);
putc_unlocked(' ', outfile);
fputs(&(sample_id_ptr[uii + 1]), outfile);
g_textbuf[0] = ' ';
bufptr = dtoa_gx(((double)missing_cts[sample_uidx]) / (double)marker_ct, ' ', &(g_textbuf[1]));
*bufptr = sexchar(sex_nm, sex_male, sample_uidx);
bufptr[1] = ' ';
bufptr = dtoa_gx(residual_vec[sample_idx], '\n', &(bufptr[2]));
if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
}
if (fclose_null(&outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
*outname_end = '\0';
putc_unlocked('\r', stdout);
LOGPRINTF("Principal component regression residuals and %sphenotype Z-scores %s%s.gen and %s.sample.\n", regress_pcs_sex_specific? "sex-specific " : "", regress_pcs_sex_specific? "\nwritten to " : "written to\n", outname, outname);
bigstack_reset(bigstack_mark);
while (0) {
calc_regress_pcs_ret_NOMEM:
retval = RET_NOMEM;
break;
calc_regress_pcs_ret_OPEN_FAIL:
retval = RET_OPEN_FAIL;
break;
calc_regress_pcs_ret_READ_FAIL:
retval = RET_READ_FAIL;
break;
calc_regress_pcs_ret_WRITE_FAIL:
retval = RET_WRITE_FAIL;
break;
calc_regress_pcs_ret_INVALID_FORMAT_3:
logerrprintb();
retval = RET_INVALID_FORMAT;
break;
calc_regress_pcs_ret_INVALID_FORMAT_2G:
logerrprint("Error: Improperly formatted .evec file.\n");
calc_regress_pcs_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
break;
}
fclose_cond(evecfile);
fclose_cond(outfile);
return retval;
}
*/
static uintptr_t g_cg_sample1idx;
static uintptr_t g_cg_sample2idx;
static uintptr_t g_cg_sample1uidx;
......
......@@ -55,8 +55,6 @@ int32_t ibs_test_calc(pthread_t* threads, char* read_dists_fname, uintptr_t unfi
int32_t groupdist_calc(pthread_t* threads, uint32_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t groupdist_iters, uint32_t groupdist_d, uint32_t pheno_nm_ct, uint32_t pheno_ctrl_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c);
// int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t max_pcs, FILE* bedfile, uintptr_t bed_offset, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t sample_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, double* pheno_d, double missing_phenod, char* outname, char* outname_end, uint32_t hh_exists);
int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, double* set_allele_freqs, uint32_t* nchrobs, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_ids, uintptr_t max_maternal_id_len, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, char* outname, char* outname_end, int32_t nonfounders, uint64_t calculation_type, uint32_t genome_modifier, uint32_t ppc_gap, double min_pi_hat, double max_pi_hat, uintptr_t* pheno_nm, uintptr_t* pheno_c, Pedigree_rel_info pri, uint32_t skip_write);
int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, char* outname_end, Rel_info* relip);
......
......@@ -313,13 +313,12 @@
#define RET_INVALID_CMDLINE 5
#define RET_WRITE_FAIL 6
#define RET_READ_FAIL 7
#define RET_HELP 8
#define RET_THREAD_CREATE_FAIL 9
#define RET_ALLELE_MISMATCH 10
#define RET_NULL_CALC 11
#define RET_ALL_SAMPLES_EXCLUDED 12
#define RET_ALL_MARKERS_EXCLUDED 13
#define RET_NETWORK 14
#define RET_THREAD_CREATE_FAIL 8
#define RET_ALLELE_MISMATCH 9
#define RET_NULL_CALC 10
#define RET_ALL_SAMPLES_EXCLUDED 11
#define RET_ALL_MARKERS_EXCLUDED 12
#define RET_NETWORK 13
#define LOAD_PHENO_LAST_COL 127
// for 2.0 -> 1.9 backports
......@@ -410,8 +409,8 @@
#define CALC_GENOME 0x1000LLU
#define CALC_REGRESS_REL 0x2000LLU
#define CALC_LD_PRUNE 0x4000LLU
#define CALC_REGRESS_PCS 0x8000LLU
#define CALC_REGRESS_PCS_DISTANCE 0x10000LLU
#define CALC_DFAM 0x8000LLU
#define CALC_TUCC 0x10000LLU
#define CALC_MAKE_BED 0x20000LLU
#define CALC_RECODE 0x40000LLU
#define CALC_MERGE 0x80000LLU
......@@ -453,8 +452,6 @@
#define CALC_WRITE_VAR_RANGES 0x80000000000000LLU
#define CALC_DUPVAR 0x100000000000000LLU
#define CALC_RPLUGIN 0x200000000000000LLU
#define CALC_DFAM 0x400000000000000LLU
#define CALC_TUCC 0x800000000000000LLU
#define CALC_ONLY_BIM (CALC_WRITE_SET | CALC_WRITE_SNPLIST | CALC_WRITE_VAR_RANGES | CALC_LIST_23_INDELS | CALC_MAKE_BIM | CALC_DUPVAR)
#define CALC_ONLY_FAM (CALC_MAKE_PERM_PHENO | CALC_WRITE_COVAR | CALC_MAKE_FAM)
// only room for 6 more basic commands before we need to switch from a single
......@@ -584,10 +581,6 @@
#define SAMPLE_SORT_ASCII 4
#define SAMPLE_SORT_FILE 8
#define REGRESS_PCS_NORMALIZE_PHENO 1
#define REGRESS_PCS_SEX_SPECIFIC 2
#define REGRESS_PCS_CLIP 4
#define HWE_MIDP 1
#define HWE_THRESH_MIDP 2
#define HWE_THRESH_ALL 4
......
......@@ -628,8 +628,8 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --indep-pairphase <window size>['kb'] <step size (variant ct)> <r^2 thresh>\n"
" Generate a list of markers in approximate linkage equilibrium. With the\n"
" 'kb' modifier, the window size is in kilobase instead of variant count\n"
" units. (Pre-'kb' space is optional, i.e. '--indep-pairwise 500 kb 5 0.5'\n"
" and '--indep-pairwise 500kb 5 0.5' have the same effect.)\n"
" units. (Pre-'kb' space is optional, i.e. \"--indep-pairwise 500 kb 5 0.5\"\n"
" and \"--indep-pairwise 500kb 5 0.5\" have the same effect.)\n"
" Note that you need to rerun " PROG_NAME_CAPS " using --extract or --exclude on the\n"
" .prune.in/.prune.out file to apply the list to another computation.\n\n"
);
......@@ -694,7 +694,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" the --hap... family of flags has not been reimplemented in PLINK 1.9 due to\n"
" poor phasing accuracy relative to other software; for now, we recommend\n"
" using BEAGLE instead of PLINK for case/control haplotype association\n"
" analysis. (You can use '--recode beagle' to export data to BEAGLE 3.3.)\n"
" analysis. (You can use \"--recode beagle\" to export data to BEAGLE 3.3.)\n"
" We apologize for the inconvenience, and plan to develop variants of the\n"
" --hap... flags which handle pre-phased data effectively.\n\n"
);
......@@ -735,8 +735,8 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
help_print("distance-matrix\tibs-matrix\tmatrix", &help_ctrl, 1,
" --distance-matrix\n"
" --ibs-matrix\n"
" These deprecated commands are equivalent to '--distance 1-ibs flat-missing\n"
" square' and '--distance ibs flat-missing square', respectively, except that\n"
" These deprecated commands are equivalent to \"--distance 1-ibs flat-missing\n"
" square\" and \"--distance ibs flat-missing square\", respectively, except that\n"
" they generate space- instead of tab-delimited text matrices.\n\n"
);
help_print("make-rel", &help_ctrl, 1,
......@@ -969,7 +969,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" derived from considering only members of one group to the regression\n"
" coefficient derived from considering only members of the other. By\n"
" default, the first covariate in the --covar file defines the groups; use\n"
" e.g. '--gxe 3' to base them on the third covariate instead.\n\n"
" e.g. \"--gxe 3\" to base them on the third covariate instead.\n\n"
);
help_print("linear\tlogistic\tperm\tmperm\tperm-count\tset-test\tgenotypic\thethom\tdominant\trecessive\tno-snp\thide-covar\tsex\tno-x-sex\tinteraction\tstandard-beta\tbeta", &help_ctrl, 1,
#ifndef NOLAPACK
......@@ -1297,31 +1297,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" have tried to execute are logged to a file.)\n\n"
);
#endif
/*
help_print("regress-pcs\tregress-pcs-distance", &help_ctrl, 1,
" --regress-pcs <.evec or .eigenvec filename> ['normalize-pheno']\n"
" ['sex-specific'] ['clip'] [max PCs]\n"
" Linear regression of phenotypes and genotypes on the given list of\n"
" principal components (produced by SMARTPCA or GCTA). Output is currently a\n"
" .gen + .sample fileset in the Oxford IMPUTE/SNPTEST v2 format.\n"
" * The 'normalize-pheno' modifier converts all phenotype residuals to\n"
" Z-scores. When combined with 'sex-specific', the Z-scores are evaluated\n"
" separately by sex.\n"
" * The 'clip' modifier clips out-of-range genotype residuals. Without it,\n"
" they are represented as negative probabilities in the .gen file, which\n"
" are invalid input for some programs.\n"
" * By default, principal components beyond the 20th are ignored; change this\n"
" by setting the max PCs parameter.\n\n"
);
help_print("regress-pcs\tdistance\tregress-pcs-distance", &help_ctrl, 1,
" --regress-pcs-distance <.evec/.eigenvec file> ['normalize-pheno']\n"
" ['sex-specific'] [max PCs]\n"
" [{square | square0 | triangle}] [{gz | bin}] ['ibs']\n"
" ['1-ibs'] ['allele-ct'] ['flat-missing']\n"
" High-speed combination of --regress-pcs and --distance (no .gen + .sample\n"
" fileset is written to disk).\n\n"
);
*/
#ifndef STABLE_BUILD
help_print("cnv-make-map", &help_ctrl, 1,
" --cnv-make-map ['short']\n"
......@@ -1447,7 +1422,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" Given diploid autosomes, the remaining modifiers indicate the absence of\n"
" the named non-autosomal chromosomes.\n"
" --cow/--dog/--horse/--mouse/--rice/--sheep : Shortcuts for those species.\n"
" --autosome-num <value> : Alias for '--chr-set <value> no-y no-xy no-mt'.\n"
" --autosome-num <value> : Alias for \"--chr-set <value> no-y no-xy no-mt\".\n"
);
help_print("cm-map\tzero-cms\tupdate-cm", &help_ctrl, 0,
" --cm-map <fname pattern> [chr] : Use SHAPEIT-format recombination maps to set\n"
......@@ -1565,7 +1540,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" either missing from the file or don't have any of\n"
" the listed attributes. If some attribute names in\n"
" the list are preceded by '-', they are treated as\n"
" 'negative match conditions' instead: variants with\n"
" \"negative match conditions\" instead: variants with\n"
" at least one negative match attribute are removed.\n"
" The first character in the list cannot be a '-', due\n"
" to how command-line parsing works; add a comma in\n"
......@@ -1576,7 +1551,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" Valid choices for humans are 0 (unplaced), 1-22, X, Y, XY,\n"
" and MT. Separate multiple chromosomes with spaces and/or\n"
" commas, and use a dash (no adjacent spaces permitted) to\n"
" denote a range, e.g. '--chr 1-4, 22, xy'.\n"
" denote a range, e.g. \"--chr 1-4, 22, xy\".\n"
" --not-chr <...> : Reverse of --chr (exclude variants on listed chromosomes).\n"
);
help_print("autosome\tautosome-xy\tchr\tnot-chr\tchr-excl", &help_ctrl, 0,
......@@ -1605,7 +1580,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
);
help_print("snps\texclude-snps", &help_ctrl, 0,
" --snps <var IDs...> : Use IDs to specify variant range(s) to load or\n"
" --exclude-snps <...> exclude. E.g. '--snps rs1111-rs2222, rs3333, rs4444'.\n"
" --exclude-snps <...> exclude. E.g. \"--snps rs1111-rs2222, rs3333, rs4444\".\n"
);
help_print("thin\tthin-count", &help_ctrl, 0,
" --thin <p> : Randomly remove variants, retaining each with prob. p.\n"
......@@ -1887,7 +1862,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
help_print("blocks\tblocks-max-kb\tblocks-min-maf\tblocks-strong-lowci\tblocks-strong-highci\tblocks-recomb-highci\tblocks-inform-frac\tld-window-kb", &help_ctrl, 0,
" --blocks-max-kb <kbs> : Set --blocks maximum haploblock span (def. 200).\n"
" --blocks-min-maf <cutoff> : Adjust --blocks MAF minimum (default 0.05).\n"
" --blocks-strong-lowci <x> : Set --blocks 'strong LD' CI thresholds (defaults\n"
" --blocks-strong-lowci <x> : Set --blocks \"strong LD\" CI thresholds (defaults\n"
" --blocks-strong-highci <x> 0.70 and 0.98).\n"
" --blocks-recomb-highci <x> : Set 'recombination' CI threshold (default 0.90).\n"
" --blocks-inform-frac <x> : Force haploblock <strong LD pairs>:<total\n"
......@@ -1920,7 +1895,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
help_print("homozyg\thomozyg-match\tpool-size", &help_ctrl, 0,
" --homozyg-match <> : Set minimum concordance across jointly homozygous\n"
" variants for a pairwise allelic match to be declared.\n"
" --pool-size <ct> : Set minimum size of pools in '--homozyg group' report.\n"
" --pool-size <ct> : Set minimum size of pools in \"--homozyg group\" report.\n"
);
help_print("read-genome\tcluster\tneighbour\tneighbor", &help_ctrl, 0,
" --read-genome <fn> : Load --genome report for --cluster/--neighbour, instead\n"
......@@ -1941,9 +1916,9 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --match-type <f> : Refine interpretation of --match file. The --match-type\n"
" file is expected to be a single line with as many entries\n"
" as the --match file has covariates; '0' entries specify\n"
" 'negative matches' (i.e. samples with equal covariate\n"
" \"negative matches\" (i.e. samples with equal covariate\n"
" values cannot be in the same cluster), '1' entries specify\n"
" 'positive matches' (default), and '-1' causes the\n"
" \"positive matches\" (default), and '-1' causes the\n"
" corresponding covariate to be ignored.\n"
" --qmatch <f> [m] : Force all members of a cluster to have similar\n"
" --qt <fname> quantitative covariate values. The --qmatch file contains\n"
......@@ -1987,7 +1962,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * Note that, when --parameters is also present, the\n"
" indices refer to the terms remaining AFTER pruning by\n"
" --parameters.\n"
" * You can use '--tests all' to include all terms.\n"
" * You can use \"--tests all\" to include all terms.\n"
);
help_print("linear\tdosage\tvif", &help_ctrl, 0,
" --vif <max VIF> : Set VIF threshold for --linear multicollinearity check\n"
......@@ -2113,7 +2088,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" 'SNP'). Only relevant with --extract.\n"
);
help_print("fast-epistasis\tepistasis\tgap\tepi1\tepi2", &help_ctrl, 0,
" --gap <kbs> : Set '--fast-epistasis case-only' min. gap (default 1000).\n"
" --gap <kbs> : Set \"--fast-epistasis case-only\" min. gap (default 1000).\n"
" --epi1 <p-value> : Set --{,fast-}epistasis reporting threshold (default\n"
" 5e-6 for 'boost', 1e-4 otherwise).\n"
" --epi2 <p-value> : Set threshold for contributing to SIG_E count (def. 0.01).\n"
......
......@@ -582,8 +582,9 @@ int32_t apply_cm_map(char* cm_map_fname, char* cm_map_chrname, uintptr_t unfilte
goto apply_cm_map_ret_INVALID_FORMAT_2;
}
if (bp_new <= bp_old) {
logerrprint("Error: bp coordinates in --cm-map file are not in increasing order.\n");
goto apply_cm_map_ret_INVALID_FORMAT;
sprintf(g_logbuf, "Error: bp coordinates in --cm-map file are not in increasing order ('%d' on line %" PRIuPTR " is not larger than previous value '%d').\n", bp_new, line_idx, bp_old);
wordwrapb(0);
goto apply_cm_map_ret_INVALID_FORMAT_2;
}
bufptr2 = next_token_mult(bufptr, 2);
if (no_more_tokens_kns(bufptr2)) {
......@@ -642,7 +643,6 @@ int32_t apply_cm_map(char* cm_map_fname, char* cm_map_chrname, uintptr_t unfilte
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --cm-map file has fewer tokens than expected.\n", line_idx);
apply_cm_map_ret_INVALID_FORMAT_2:
logerrprintb();
apply_cm_map_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
break;
}
......
......@@ -41,9 +41,9 @@ char pathbuf[FNAMESIZE * 2 + 128];
void disp_usage(FILE* stream) {
fputs(
" prettify {flag(s)...} [input filename] {output filename}\n\n"
" prettify [flag(s)...] <input filename> [output filename]\n\n"
" -i, --inplace : Replace the input instead of writing to a new file.\n"
" -s, --spacing [ct] : Set number of spaces between columns (default 2).\n"
" -s, --spacing <ct> : Set number of spaces between columns (default 2).\n"
" -r, --ralign : Make right sides of columns line up, instead of left.\n"
" -l, --leading : Add space(s) before the first column.\n"
" -e, --extend-short : Use spaces to extend lines with fewer columns.\n"
......@@ -55,7 +55,7 @@ void disp_usage(FILE* stream) {
, stream);
}
void dispmsg(retval) {
void dispmsg(int32_t retval) {
switch (retval) {
case RET_NOMEM:
fputs("\nError: Out of memory.\n", stderr);
......@@ -184,7 +184,7 @@ int32_t scan_column_widths(FILE* infile, uintptr_t column_sep, uintptr_t** col_w
}
if (++cur_col_idx == max_col_ct) {
malloc_size *= 2;
new_col_widths = realloc(col_widths, malloc_size);
new_col_widths = (uintptr_t*)realloc(col_widths, malloc_size);
if (!new_col_widths) {
goto scan_column_widths_ret_READ_FAIL;
}
......@@ -602,7 +602,7 @@ int32_t main(int32_t argc, char** argv) {
while (0) {
main_ret_HELP:
fputs(
"prettify v1.04 (21 Feb 2014) Christopher Chang (chrchang@alumni.caltech.edu)\n\n"
"prettify v1.05 (5 Mar 2019) Christopher Chang (chrchang@alumni.caltech.edu)\n\n"
"Takes a tab-and/or-space-delimited text table, and generates a space-delimited\n"
"pretty-printed version. Multibyte character encodings are not currently\n"
"supported.\n\n"
......@@ -611,10 +611,10 @@ int32_t main(int32_t argc, char** argv) {
fputs(
"\nTo perform the simplest reverse conversion (multiple spaces to one tab), you\n"
"can use\n"
" cat [input filename] | tr -s ' ' '\\t' > [output filename]\n"
" cat <input filename> | tr -s ' ' '\\t' > <output filename>\n"
"For one-to-one conversion between spaces and tabs instead, omit the \"-s\". And\n"
"to strip leading and trailing tabs and spaces, try\n"
" cat [in] | sed 's/^[[:space:]]*//g' | sed 's/[[:space:]]*$//g' > [out]\n"
" cat <in> | sed 's/^[[:space:]]*//g' | sed 's/[[:space:]]*$//g' > <out>\n"
, stdout);
retval = RET_HELP;
break;
......