Re: Help needed: Bug#962292: ITP: flye -- de novo assembler for single molecule sequencing reads using repeat graphs
Hi Steffen,
On Sat, Jun 06, 2020 at 03:41:44PM +0200, Steffen Möller wrote:
> > $ /usr/bin/minimap2 out_pacbio/10-consensus/chunks.fasta E.coli_PacBio_40x.fasta -x map-pb -t 4 -a -p 0.5 -N 10 --sam-hit-only -L -Q --secondary-seq
> > [ERROR] unknown option in "E.coli_PacBio_40x.fasta"
> > Exit code: 1
>
> A look at -h and the .1 suggests that the order of arguments is
> unfortunate. Options should be first. Please kindly try
>
> /usr/bin/minimap2 -x map-pb -t 4 -a -p 0.5 -N 10 --sam-hit-only -L -Q out_pacbio/10-consensus/chunks.fasta E.coli_PacBio_40x.fasta
>
> and then add the "--secondary-seq" argument and see it fail.
>
> At https://github.com/fenderglass/Flye/issues/249 I get the impression that the --secondary-seq is not an error and at
> https://github.com/fenderglass/Flye/blob/b0107d8a4806a42aa8a16667bb1abb93ab9afd56/lib/minimap2/main.c#L69
> I see it implemented which I do not see in the original
> https://github.com/lh3/minimap2/blob/master/main.c
>
> I do not have an immediate answer to what to do now. Upstream calls their minimap "flye-minimap". So, maybe embedding that binary with the flye binary may indeed be an option.
The suspicion that the minimap2 code copy in flye is patched. I have
attached the diff between the Debian packaged minimap2 which actually
introduces --secondary-seq.
I wonder if there is some volunteer to talk to both upstreams to
merge the patch.
Kind regards
Andreas.
--
http://fam-tille.de
diff -ubrN minimap2/format.c flye-minimap2/format.c
--- minimap2/format.c 2019-08-01 15:06:53.638518907 +0200
+++ flye-minimap2/format.c 2020-04-24 22:34:42.000000000 +0200
@@ -367,14 +367,18 @@
clip_len[0] = r->rev? qlen - r->qe : r->qs;
clip_len[1] = r->rev? r->qs : qlen - r->qe;
if (in_tag) {
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4;
+ //int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4;
+ int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) &&
+ !(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
mm_sprintf_lite(s, "\tCG:B:I");
if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
} else {
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
+ //int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
+ int clip_char = ((sam_flag&0x800 || (sam_flag&0x100 && opt_flag&MM_F_SECONDARY_SEQ)) &&
+ !(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
assert(clip_len[0] < qlen && clip_len[1] < qlen);
if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
for (k = 0; k < r->p->n_cigar; ++k)
@@ -449,7 +453,7 @@
if (cigar_in_tag) {
int slen;
if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
- else if (flag & 0x100) slen = 0;
+ else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
else slen = r->qe - r->qs;
mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
} else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
@@ -490,7 +494,7 @@
mm_sprintf_lite(s, "\t");
if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
else mm_sprintf_lite(s, "*");
- } else if (flag & 0x100) {
+ } else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
mm_sprintf_lite(s, "*\t*");
} else {
sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);
diff -ubrN minimap2/main.c flye-minimap2/main.c
--- minimap2/main.c 2019-08-01 15:06:53.638518907 +0200
+++ flye-minimap2/main.c 2020-04-24 22:34:42.000000000 +0200
@@ -66,6 +66,7 @@
{ "junc-bed", ko_required_argument, 340 },
{ "junc-bonus", ko_required_argument, 341 },
{ "sam-hit-only", ko_no_argument, 342 },
+ { "secondary-seq", ko_no_argument, 343 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@@ -209,6 +210,7 @@
else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
else if (c == 340) junc_bed = o.arg; // --junc-bed
else if (c == 342) opt.flag |= MM_F_SAM_HIT_ONLY; // --sam-hit-only
+ else if (c == 343) opt.flag |= MM_F_SECONDARY_SEQ; // --secondary-seq
else if (c == 314) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
} else if (c == 315) { // --secondary
diff -ubrN minimap2/minimap.h flye-minimap2/minimap.h
--- minimap2/minimap.h 2019-08-01 15:06:53.642518911 +0200
+++ flye-minimap2/minimap.h 2020-04-24 22:34:42.000000000 +0200
@@ -36,6 +36,7 @@
#define MM_F_NO_END_FLT 0x10000000
#define MM_F_HARD_MLEVEL 0x20000000
#define MM_F_SAM_HIT_ONLY 0x40000000
+#define MM_F_SECONDARY_SEQ 0x80000000 //output SEQ field for seqondary alignments using hard clipping
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2
Reply to: