|
#include "samtools.pysam.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <config.h> |
|
|
|
#include <stdio.h> |
|
#include <ctype.h> |
|
#include <errno.h> |
|
#include "bam.h" |
|
#include "htslib/kstring.h" |
|
|
|
|
|
const char *bam_get_library(sam_hdr_t *h, const bam1_t *b) |
|
{ |
|
const char *rg; |
|
kstring_t lib = { 0, 0, NULL }; |
|
rg = (char *)bam_aux_get(b, "RG"); |
|
|
|
if (!rg) |
|
return NULL; |
|
else |
|
rg++; |
|
|
|
if (sam_hdr_find_tag_id(h, "RG", "ID", rg, "LB", &lib) < 0) |
|
return NULL; |
|
|
|
static char LB_text[1024]; |
|
int len = lib.l < sizeof(LB_text) - 1 ? lib.l : sizeof(LB_text) - 1; |
|
|
|
memcpy(LB_text, lib.s, len); |
|
LB_text[len] = 0; |
|
|
|
free(lib.s); |
|
|
|
return LB_text; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) ) |
|
|
|
int bam_remove_B(bam1_t *b) |
|
{ |
|
int i, j, end_j, k, l, no_qual; |
|
uint32_t *cigar, *new_cigar; |
|
uint8_t *seq, *qual, *p; |
|
|
|
if (b->core.flag & BAM_FUNMAP) return 0; |
|
cigar = bam_get_cigar(b); |
|
for (k = 0; k < b->core.n_cigar; ++k) |
|
if (bam_cigar_op(cigar[k]) == BAM_CBACK) break; |
|
if (k == b->core.n_cigar) return 0; |
|
if (bam_cigar_op(cigar[0]) == BAM_CBACK) goto rmB_err; |
|
|
|
if (b->l_data + (b->core.n_cigar + 1) * 4 > b->m_data) { |
|
b->m_data = b->l_data + b->core.n_cigar * 4; |
|
kroundup32(b->m_data); |
|
b->data = (uint8_t*)realloc(b->data, b->m_data); |
|
cigar = bam_get_cigar(b); |
|
} |
|
new_cigar = (uint32_t*)(b->data + (b->m_data - b->core.n_cigar * 4)); |
|
|
|
seq = bam_get_seq(b); qual = bam_get_qual(b); |
|
no_qual = (qual[0] == 0xff); |
|
i = j = 0; end_j = -1; |
|
for (k = l = 0; k < b->core.n_cigar; ++k) { |
|
int op = bam_cigar_op(cigar[k]); |
|
int len = bam_cigar_oplen(cigar[k]); |
|
if (op == BAM_CBACK) { |
|
int t, u; |
|
if (k == b->core.n_cigar - 1) break; |
|
if (len > j) goto rmB_err; |
|
for (t = l - 1, u = 0; t >= 0; --t) { |
|
int op1 = bam_cigar_op(new_cigar[t]); |
|
int len1 = bam_cigar_oplen(new_cigar[t]); |
|
if (bam_cigar_type(op1)&1) { |
|
if (u + len1 >= len) { |
|
new_cigar[t] -= (len - u) << BAM_CIGAR_SHIFT; |
|
break; |
|
} else u += len1; |
|
} |
|
} |
|
if (bam_cigar_oplen(new_cigar[t]) == 0) --t; |
|
l = t + 1; |
|
end_j = j; j -= len; |
|
} else { |
|
new_cigar[l++] = cigar[k]; |
|
if (bam_cigar_type(op)&1) { |
|
if (i != j) { |
|
int u, c, c0; |
|
for (u = 0; u < len; ++u) { |
|
c = bam_seqi(seq, i+u); |
|
if (j + u < end_j) { |
|
c0 = bam_seqi(seq, j+u); |
|
if (c != c0) { |
|
if (qual[j+u] < qual[i+u]) { |
|
bam1_seq_seti(seq, j+u, c); |
|
qual[j+u] = qual[i+u] - qual[j+u]; |
|
} else qual[j+u] -= qual[i+u]; |
|
} else qual[j+u] = qual[j+u] > qual[i+u]? qual[j+u] : qual[i+u]; |
|
} else { |
|
bam1_seq_seti(seq, j+u, c); |
|
qual[j+u] = qual[i+u]; |
|
} |
|
} |
|
} |
|
i += len, j += len; |
|
} |
|
} |
|
} |
|
if (no_qual) qual[0] = 0xff; |
|
|
|
for (k = 1; k < l; ++k) |
|
if (bam_cigar_op(new_cigar[k]) == bam_cigar_op(new_cigar[k-1])) |
|
new_cigar[k] += new_cigar[k-1] >> BAM_CIGAR_SHIFT << BAM_CIGAR_SHIFT, new_cigar[k-1] &= 0xf; |
|
|
|
for (k = i = 0; k < l; ++k) |
|
if (new_cigar[k] >> BAM_CIGAR_SHIFT) |
|
new_cigar[i++] = new_cigar[k]; |
|
l = i; |
|
|
|
memcpy(cigar, new_cigar, l * 4); |
|
p = b->data + b->core.l_qname + l * 4; |
|
memmove(p, seq, (j+1)>>1); p += (j+1)>>1; |
|
memmove(p, qual, j); p += j; |
|
memmove(p, bam_get_aux(b), bam_get_l_aux(b)); p += bam_get_l_aux(b); |
|
b->core.n_cigar = l, b->core.l_qseq = j; |
|
b->l_data = p - b->data; |
|
return 0; |
|
|
|
rmB_err: |
|
b->core.flag |= BAM_FUNMAP; |
|
return -1; |
|
} |
|
|
|
|
|
hts_pos_t unclipped_start(bam1_t *b) { |
|
uint32_t *cigar = bam_get_cigar(b); |
|
int64_t clipped = 0; |
|
uint32_t i; |
|
|
|
for (i = 0; i < b->core.n_cigar; i++) { |
|
char c = bam_cigar_opchr(cigar[i]); |
|
|
|
if (c == 'S' || c == 'H') { |
|
clipped += bam_cigar_oplen(cigar[i]); |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return b->core.pos - clipped + 1; |
|
} |
|
|
|
|
|
hts_pos_t unclipped_other_start(hts_pos_t op, char *cigar) { |
|
char *c = cigar; |
|
int64_t clipped = 0; |
|
|
|
while (*c && *c != '*') { |
|
long num = 0; |
|
|
|
if (isdigit((int)*c)) { |
|
num = strtol(c, &c, 10); |
|
} else { |
|
num = 1; |
|
} |
|
|
|
if (*c == 'S' || *c == 'H') { |
|
clipped += num; |
|
} else { |
|
break; |
|
} |
|
|
|
c++; |
|
} |
|
|
|
return op - clipped + 1; |
|
} |
|
|
|
|
|
hts_pos_t unclipped_end(bam1_t *b) { |
|
uint32_t *cigar = bam_get_cigar(b); |
|
hts_pos_t end_pos, clipped = 0; |
|
int32_t i; |
|
|
|
end_pos = bam_endpos(b); |
|
|
|
|
|
|
|
|
|
for (i = b->core.n_cigar - 1; i >= 0; i--) { |
|
char c = bam_cigar_opchr(cigar[i]); |
|
|
|
if (c == 'S' || c == 'H') { |
|
clipped += bam_cigar_oplen(cigar[i]); |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return end_pos + clipped; |
|
} |
|
|
|
|
|
|
|
hts_pos_t unclipped_other_end(int64_t op, char *cigar) { |
|
char *c = cigar; |
|
int64_t refpos = 0; |
|
int skip = 1; |
|
|
|
while (*c && *c != '*') { |
|
long num = 0; |
|
|
|
if (isdigit((int)*c)) { |
|
num = strtol(c, &c, 10); |
|
} else { |
|
num = 1; |
|
} |
|
|
|
switch (*c) { |
|
case 'M': |
|
case 'D': |
|
case 'N': |
|
case '=': |
|
case 'X': |
|
refpos += num; |
|
skip = 0; |
|
break; |
|
|
|
case 'S': |
|
case 'H': |
|
if (!skip) { |
|
refpos += num; |
|
} |
|
break; |
|
} |
|
|
|
c++; |
|
} |
|
|
|
return op + refpos; |
|
} |
|
|