Skip to content

Commit 05a0db9

Browse files
committed
fix failing test and remove unnecessary build
Appveyor keeps failing single tests with a permission error, not sure why
1 parent 359c4f7 commit 05a0db9

File tree

18 files changed

+426
-229
lines changed

18 files changed

+426
-229
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "seqtool"
3-
version = "0.2.3"
3+
version = "0.2.1"
44
authors = ["<[email protected]>"]
55
description = "General purpose tool for reading, modifying and writing biological sequences."
66
license = "MIT"
@@ -46,4 +46,4 @@ exprtk = ["exprtk_rs"]
4646

4747
[profile.release]
4848
lto = true
49-
#panic = "abort"
49+
panic = "abort" # ~ 10% faster

ci/script.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,13 @@ set -ex
44

55
main() {
66
cross build --features=exprtk --target $TARGET
7-
cross build --features=exprtk --target $TARGET --release
87

98
if [ ! -z $DISABLE_TESTS ]; then
109
return
1110
fi
1211

1312
cross test --features=exprtk --target $TARGET
14-
cross test --features=exprtk --target $TARGET --release
15-
1613
cross run --features=exprtk --target $TARGET
17-
cross run --features=exprtk --target $TARGET --release
1814
}
1915

2016
# we don't run the "test phase" when doing deploys

scripts/time.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ time seqkit fq2fa $f > /dev/null
3333
# random subsampling
3434
time s sample -f 0.1 $f > /dev/null
3535
time seqtk sample $f 0.1 > /dev/null
36-
time seqkit sample -a 0.1 $f > /dev/null
36+
time seqkit sample -p 0.1 $f > /dev/null
3737

3838
# counting
3939
time s count $f

src/cfg.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@ impl<'a> Config<'a> {
110110
input::io_readers(&self.input_opts, |in_opts, rdr| {
111111
vars.new_input(in_opts)?;
112112
input::run_reader(&in_opts.format, rdr, in_opts.cap, in_opts.max_mem, |rec| {
113-
let rec = DefRecord::from_rec(rec);
114113
vars.set_record(&rec)?;
115114
func(&rec, vars)
116115
})

src/cmd/replace.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::ToOwned;
22
use std::str;
33

44
use regex;
5+
use memchr::Memchr;
56

67
use lib::twoway_iter::TwowayIter;
78
use error::CliResult;
@@ -10,6 +11,7 @@ use io::{SeqAttr, RecordEditor};
1011
use cfg;
1112
use lib::util::replace_iter;
1213

14+
1315
static USAGE: &'static str = concat!("
1416
This command does fast search and replace for patterns in sequences
1517
or ids/descriptions.
@@ -59,8 +61,13 @@ pub fn run() -> CliResult<()> {
5961
run_replace(&cfg, attr, replacement, replacer, num_threads)?;
6062
}
6163
} else {
62-
let replacer = BytesReplacer(pattern.as_bytes().to_owned());
63-
run_replace(&cfg, attr, replacement, replacer, num_threads)?;
64+
if pattern.len() == 1 {
65+
let replacer = SingleByteReplacer(pattern.as_bytes()[0]);
66+
run_replace(&cfg, attr, replacement, replacer, num_threads)?;
67+
} else {
68+
let replacer = BytesReplacer(pattern.as_bytes().to_owned());
69+
run_replace(&cfg, attr, replacement, replacer, num_threads)?;
70+
}
6471
}
6572
Ok(())
6673
}
@@ -94,6 +101,17 @@ trait Replacer {
94101
fn replace(&self, text: &[u8], replacement: &[u8], out: &mut Vec<u8>) -> CliResult<()>;
95102
}
96103

104+
struct SingleByteReplacer(u8);
105+
106+
impl Replacer for SingleByteReplacer {
107+
fn replace(&self, text: &[u8], replacement: &[u8], out: &mut Vec<u8>) -> CliResult<()> {
108+
let matches = Memchr::new(self.0, text).map(|start| (start, start + 1));
109+
replace_iter(text, replacement, out, matches);
110+
Ok(())
111+
}
112+
}
113+
114+
97115
struct BytesReplacer(Vec<u8>);
98116

99117
impl Replacer for BytesReplacer {
@@ -112,7 +130,7 @@ impl Replacer for BytesRegexReplacer {
112130
let matches = self.0.find_iter(text).map(|m| (m.start(), m.end()));
113131
replace_iter(text, replacement, out, matches);
114132
} else {
115-
// slower, requires allocations
133+
// requires allocations
116134
let replaced = self.0.replace_all(text, replacement);
117135
out.extend_from_slice(&replaced);
118136
}
@@ -129,7 +147,7 @@ impl Replacer for RegexReplacer {
129147
let matches = self.0.find_iter(string).map(|m| (m.start(), m.end()));
130148
replace_iter(text, replacement, out, matches);
131149
} else {
132-
// slower, requires allocations
150+
// requires allocations
133151
let replacement = str::from_utf8(replacement)?;
134152
let replaced = self.0.replace_all(string, replacement);
135153
out.extend_from_slice(replaced.as_bytes());

src/io/csv.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::io;
22
use std::convert::AsRef;
33
use std::collections::HashMap;
4-
use std::borrow::ToOwned;
4+
use std::borrow::{Cow,ToOwned};
55

66
use csv;
77
use lib::util::match_fields;
@@ -168,10 +168,19 @@ impl Record for CsvRecord {
168168
(self.id_bytes(), self.desc_bytes())
169169
}
170170

171+
fn get_header(&self) -> SeqHeader {
172+
let (id, desc) = self.id_desc_bytes();
173+
SeqHeader::IdDesc(id, desc)
174+
}
175+
171176
fn raw_seq(&self) -> &[u8] {
172177
self.data.get(self.cols.seq_col).unwrap_or(b"")
173178
}
174179

180+
fn has_seq_lines(&self) -> bool {
181+
false
182+
}
183+
175184
fn qual(&self) -> Option<&[u8]> {
176185
self.cols.qual_col.map(|i| self.data.get(i).unwrap_or(b""))
177186
}
@@ -180,5 +189,3 @@ impl Record for CsvRecord {
180189
to.extend_from_slice(self.raw_seq())
181190
}
182191
}
183-
184-
// Writer

src/io/fasta.rs

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,76 @@
1+
12
use std::io;
3+
use std::borrow::{Cow,ToOwned};
4+
use std::cell::Cell;
5+
6+
use memchr::memchr;
27

38
use error::CliResult;
4-
use seq_io::fasta;
9+
use seq_io::fasta::{self, Record as FR};
510
use super::*;
611

7-
// Record
12+
// Wrapper for FASTA record
813

9-
impl<'a> Record for fasta::RefRecord<'a> {
10-
fn id_bytes(&self) -> &[u8] {
11-
<fasta::RefRecord as fasta::Record>::id_bytes(self)
14+
pub struct FastaRecord<'a> {
15+
rec: fasta::RefRecord<'a>,
16+
delim: Cell<Option<Option<usize>>>
17+
}
18+
19+
impl<'a> FastaRecord<'a> {
20+
#[inline(always)]
21+
pub fn new(inner: fasta::RefRecord<'a>) -> FastaRecord<'a> {
22+
FastaRecord {
23+
rec: inner,
24+
delim: Cell::new(None)
25+
}
1226
}
1327

14-
fn desc_bytes(&self) -> Option<&[u8]> {
15-
<fasta::RefRecord as fasta::Record>::desc_bytes(self)
28+
#[inline(always)]
29+
fn _get_header(&self) -> (&[u8], Option<&[u8]>) {
30+
if let Some(d) = self.delim.get() {
31+
if let Some(d) = d {
32+
let (id, desc) = self.rec.head().split_at(d);
33+
(id, Some(&desc[1..]))
34+
} else {
35+
(self.rec.head(), None)
36+
}
37+
} else {
38+
self.delim.set(Some(memchr(b' ', self.rec.head())));
39+
self._get_header()
40+
}
1641
}
42+
}
1743

44+
impl<'a> Record for FastaRecord<'a> {
45+
fn id_bytes(&self) -> &[u8] {
46+
self._get_header().0
47+
}
48+
fn desc_bytes(&self) -> Option<&[u8]> {
49+
self._get_header().1
50+
}
1851
fn id_desc_bytes(&self) -> (&[u8], Option<&[u8]>) {
19-
<fasta::RefRecord as fasta::Record>::id_desc_bytes(self)
52+
self._get_header()
53+
}
54+
fn delim(&self) -> Option<Option<usize>> {
55+
self.delim.get()
56+
}
57+
fn set_delim(&self, delim: Option<usize>) {
58+
self.delim.set(Some(delim))
59+
}
60+
fn get_header(&self) -> SeqHeader {
61+
SeqHeader::FullHeader(self.rec.head())
2062
}
21-
2263
fn raw_seq(&self) -> &[u8] {
23-
<fasta::RefRecord as fasta::Record>::seq(self)
64+
self.rec.seq()
65+
}
66+
fn has_seq_lines(&self) -> bool {
67+
self.rec.num_seq_lines() > 1
2468
}
25-
2669
fn qual(&self) -> Option<&[u8]> {
2770
None
2871
}
29-
3072
fn seq_segments(&self) -> SeqLineIter {
31-
SeqLineIter::Fasta(self.seq_lines())
73+
SeqLineIter::Fasta(self.rec.seq_lines())
3274
}
3375
}
3476

@@ -48,13 +90,16 @@ impl<W: io::Write> FastaWriter<W> {
4890
}
4991
}
5092

93+
5194
impl<W: io::Write> SeqWriter for FastaWriter<W> {
52-
fn write(&mut self, id: &[u8], desc: Option<&[u8]>, record: &Record) -> CliResult<()> {
95+
fn write(&mut self, record: &Record) -> CliResult<()> {
96+
match record.get_header() {
97+
SeqHeader::IdDesc(id, desc) => fasta::write_id_desc(&mut self.io_writer, id, desc)?,
98+
SeqHeader::FullHeader(h) => fasta::write_head(&mut self.io_writer, h)?,
99+
}
53100
if let Some(wrap) = self.wrap {
54-
fasta::write_id_desc(&mut self.io_writer, id, desc)?;
55101
fasta::write_wrap_seq_iter(&mut self.io_writer, record.seq_segments(), wrap)?;
56102
} else {
57-
fasta::write_id_desc(&mut self.io_writer, id, desc)?;
58103
fasta::write_seq_iter(&mut self.io_writer, record.seq_segments())?;
59104
}
60105
Ok(())

src/io/fastq.rs

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,77 @@
1+
12
use std::io;
23
use error::CliResult;
4+
use std::borrow::Cow;
5+
use std::cell::Cell;
6+
7+
use memchr::memchr;
38

4-
use seq_io::fastq;
9+
use seq_io::fastq::{self, Record as FR};
510
use super::*;
611

7-
// Record
12+
// Wrapper for FASTQ record
813

9-
impl<'a> Record for fastq::RefRecord<'a> {
10-
fn id_bytes(&self) -> &[u8] {
11-
<fastq::RefRecord as fastq::Record>::id_bytes(self)
14+
pub struct FastqRecord<'a> {
15+
rec: fastq::RefRecord<'a>,
16+
delim: Cell<Option<Option<usize>>>
17+
}
18+
19+
impl<'a> FastqRecord<'a> {
20+
#[inline(always)]
21+
pub fn new(inner: fastq::RefRecord<'a>) -> FastqRecord<'a> {
22+
FastqRecord {
23+
rec: inner,
24+
delim: Cell::new(None)
25+
}
1226
}
1327

14-
fn desc_bytes(&self) -> Option<&[u8]> {
15-
<fastq::RefRecord as fastq::Record>::desc_bytes(self)
28+
#[inline(always)]
29+
fn _get_header(&self) -> (&[u8], Option<&[u8]>) {
30+
if let Some(d) = self.delim.get() {
31+
if let Some(d) = d {
32+
let (id, desc) = self.rec.head().split_at(d);
33+
(id, Some(&desc[1..]))
34+
} else {
35+
(self.rec.head(), None)
36+
}
37+
} else {
38+
self.delim.set(Some(memchr(b' ', self.rec.head())));
39+
self._get_header()
40+
}
1641
}
42+
}
1743

44+
impl<'a> Record for FastqRecord<'a> {
45+
fn id_bytes(&self) -> &[u8] {
46+
self._get_header().0
47+
}
48+
fn desc_bytes(&self) -> Option<&[u8]> {
49+
self._get_header().1
50+
}
1851
fn id_desc_bytes(&self) -> (&[u8], Option<&[u8]>) {
19-
<fastq::RefRecord as fastq::Record>::id_desc_bytes(self)
52+
self._get_header()
53+
}
54+
fn delim(&self) -> Option<Option<usize>> {
55+
self.delim.get()
56+
}
57+
fn set_delim(&self, delim: Option<usize>) {
58+
self.delim.set(Some(delim))
59+
}
60+
fn get_header(&self) -> SeqHeader {
61+
SeqHeader::FullHeader(self.rec.head())
2062
}
21-
2263
fn raw_seq(&self) -> &[u8] {
23-
<fastq::RefRecord as fastq::Record>::seq(self)
64+
self.rec.seq()
2465
}
25-
26-
fn qual(&self) -> Option<&[u8]> {
27-
Some(<fastq::RefRecord as fastq::Record>::qual(self))
66+
fn has_seq_lines(&self) -> bool {
67+
false
2868
}
29-
30-
fn write_seq(&self, to: &mut Vec<u8>) {
31-
to.extend_from_slice(self.raw_seq())
69+
fn qual(&self) -> Option<&[u8]> {
70+
Some(<fastq::RefRecord as fastq::Record>::qual(&self.rec))
3271
}
3372
}
3473

74+
3575
// Writer
3676

3777
pub struct FastqWriter<W: io::Write>(W);
@@ -43,13 +83,17 @@ impl<W: io::Write> FastqWriter<W> {
4383
}
4484

4585
impl<W: io::Write> SeqWriter for FastqWriter<W> {
46-
fn write(&mut self, id: &[u8], desc: Option<&[u8]>, record: &Record) -> CliResult<()> {
86+
fn write(&mut self, record: &Record) -> CliResult<()> {
4787
let qual = record.qual().ok_or("Qualities missing!")?;
4888
// Using .raw_seq() is possible only because FASTA cannot be used as input source
49-
// (no quality info). Might become a problem if getting the quality info from other sources
89+
// (no quality info). Might change if getting the quality info from other sources
5090
// (mothur-style .qual files)
5191
let seq = record.raw_seq();
52-
fastq::write_parts(&mut self.0, id, desc, seq, qual)?;
92+
93+
match record.get_header() {
94+
SeqHeader::IdDesc(id, desc) => fastq::write_parts(&mut self.0, id, desc, seq, qual)?,
95+
SeqHeader::FullHeader(h) => fastq::write_to(&mut self.0, h, seq, qual)?,
96+
}
5397
Ok(())
5498
}
5599
}

0 commit comments

Comments
 (0)