libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pappso::BafAsciiFileReader Class Reference

#include <bafasciifilereader.h>

Inheritance diagram for pappso::BafAsciiFileReader:
pappso::MsFileReader

Public Member Functions

 BafAsciiFileReader (const QString &file_name)
 
virtual ~BafAsciiFileReader ()
 
virtual MsDataFormat getFileFormat () override
 
virtual std::vector< MsRunIdCstSPtrgetMsRunIds (const QString &run_prefix) override
 
MsRunReaderselectMsRunReader (const QString &file_name) const
 

Private Member Functions

virtual bool initialize (std::size_t &line_count)
 
- Private Member Functions inherited from pappso::MsFileReader
 MsFileReader (const QString &file_name)
 
virtual ~MsFileReader ()
 

Additional Inherited Members

- Private Attributes inherited from pappso::MsFileReader
QString m_fileName
 
MsDataFormat m_fileFormat = MsDataFormat::unknown
 

Detailed Description

Definition at line 15 of file bafasciifilereader.h.

Constructor & Destructor Documentation

◆ BafAsciiFileReader()

pappso::BafAsciiFileReader::BafAsciiFileReader ( const QString &  file_name)

Definition at line 29 of file bafasciifilereader.cpp.

30 : MsFileReader{file_name}
31{
32 // To avoid initializing multiple times (costly process), we
33 // only initialize when needed, that is, upon getMsRunIds().
34 // initialize();
35}
MsFileReader(const QString &file_name)

◆ ~BafAsciiFileReader()

pappso::BafAsciiFileReader::~BafAsciiFileReader ( )
virtual

Definition at line 38 of file bafasciifilereader.cpp.

39{
40}

Member Function Documentation

◆ getFileFormat()

MsDataFormat pappso::BafAsciiFileReader::getFileFormat ( )
overridevirtual

Implements pappso::MsFileReader.

Definition at line 265 of file bafasciifilereader.cpp.

266{
267 return m_fileFormat;
268}
MsDataFormat m_fileFormat

References pappso::MsFileReader::m_fileFormat.

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ getMsRunIds()

std::vector< MsRunIdCstSPtr > pappso::BafAsciiFileReader::getMsRunIds ( const QString &  run_prefix)
overridevirtual

Implements pappso::MsFileReader.

Definition at line 272 of file bafasciifilereader.cpp.

273{
274 std::vector<MsRunIdCstSPtr> ms_run_ids;
275
276 std::size_t ms_data_line_count = 0;
277
278 if(!initialize(ms_data_line_count))
279 return ms_run_ids;
280
281 // Finally create the MsRunId with the file name.
282 MsRunId ms_run_id(m_fileName);
283 ms_run_id.setMsDataFormat(m_fileFormat);
284
285 // We need to set the unambiguous xmlId string.
286 ms_run_id.setXmlId(
287 QString("%1%2").arg(run_prefix).arg(Utils::getLexicalOrderedString(0)));
288
289 // Craft a meaningful sample name because otherwise all the files loaded from
290 // text files will have the same sample name and it will be difficult to
291 // differentiate them.
292 // Orig version:
293 // ms_run_id.setRunId("Single spectrum");
294 // Now the sample name is nothing but the file name without the path.
295
296 QFileInfo file_info(m_fileName);
297
298 // qDebug() << "file name:" << m_fileName;
299
300 QString sample_name = file_info.fileName();
301
302 // qDebug() << "sample name:" << sample_name;
303
304 ms_run_id.setRunId(sample_name);
305
306 // Now set the sample name to the run id:
307
308 ms_run_id.setSampleName(ms_run_id.getRunId());
309
310 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
311 //<< "Current ms_run_id:" << ms_run_id.toString();
312
313 // Finally make a shared pointer out of it and append it to the vector.
314 ms_run_ids.push_back(std::make_shared<MsRunId>(ms_run_id));
315
316 return ms_run_ids;
317}
virtual bool initialize(std::size_t &line_count)
static const QString getLexicalOrderedString(unsigned int num)
Definition utils.cpp:74

References pappso::Utils::getLexicalOrderedString(), pappso::MsRunId::getRunId(), initialize(), pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::MsRunId::setMsDataFormat(), pappso::MsRunId::setRunId(), pappso::MsRunId::setSampleName(), and pappso::MsRunId::setXmlId().

Referenced by pappso::MsFileAccessor::getMsRunIds().

◆ initialize()

bool pappso::BafAsciiFileReader::initialize ( std::size_t &  line_count)
privatevirtual

Definition at line 43 of file bafasciifilereader.cpp.

44{
45 // Here we just test some the lines of the file to check that they comply with
46 // the brukerBafAscii format.
47
48 line_count = 0;
49
50 QFile file(m_fileName);
51
52 if(!file.open(QFile::ReadOnly | QFile::Text))
53 {
54 qDebug() << "Failed to open file" << m_fileName;
55
56 return false;
57 }
58
59 // Construct the regular expression pattern, piecemeal...
60
61 // The retention time as the very first value in the line.
62
63 QString regexp_pattern = QString("^(%1)").arg(
65
66 // The ionization mode (positive or negative)
67 regexp_pattern += QString(",([+-])");
68
69 regexp_pattern += QString(",(ESI|MALDI)");
70
71 // The MS level (ms1 for full scan mass spectrum)
72 regexp_pattern += QString(",ms(\\d)");
73
74 // Do no know what this is for.
75 regexp_pattern += QString(",(-)");
76
77 // The type of peak (profile or centroid).
78 regexp_pattern += QString(",(profile|line)");
79
80 // The m/z range of the mass spectrum.
81
82 regexp_pattern +=
83 QString(",(%1-%2)")
86
87 // The count of peaks following this element in the remaining of the line.
88
89 regexp_pattern += QString(",(\\d+)");
90
91 regexp_pattern += QString("(.*$)");
92
93 // qDebug() << "The full regexp_pattern:" << regexp_pattern;
94
95 QRegularExpression line_regexp(regexp_pattern);
96
97 QRegularExpressionMatch regexp_match;
98
99 QString line;
100 bool file_reading_failed = false;
101 bool ok = false;
102
103 // Reading, parsing and checking lines is extremely time consuming.
104 // What we want here is reduce the time all the file's lines are
105 // read. We could say that we want to parse and check the first
106 // CHECKED_LINES_COUNT lines and then avoid parsing and checking, just go
107 // through the lines. At the end of the file, the number of lines that have
108 // been read is stored in the out parameter line_count.
109 std::size_t iter = 0;
110
111 while(!file.atEnd())
112 {
113 line = file.readLine().trimmed();
114
115 ++iter;
116 // qDebug() << "Read one line more: (not yet checked)" << iter;
117 if(iter > CHECKED_LINES_COUNT)
118 continue;
119
120 if(line.startsWith('#') || line.isEmpty() ||
121 Utils::endOfLineRegExp.match(line).hasMatch())
122 continue;
123
124 // qDebug() << "Current brukerBafAscii format line " << line_count << ": "
125 // << line.left(30) << " ... " << line.right(30);
126
127 regexp_match = line_regexp.match(line);
128
129 if(regexp_match.hasMatch())
130 {
131 // qDebug() << "The match succeeded.";
132
133 double retention_time = regexp_match.captured(1).toDouble(&ok);
134 if(!ok)
135 {
136 qDebug()
137 << "Failed to extract the retention time of the mass spectrum.";
138
139 file_reading_failed = true;
140
141 break;
142 }
143
144 QString ionization_mode = regexp_match.captured(2);
145 QString source_type = regexp_match.captured(3);
146
147 int ms_level = regexp_match.captured(4).toInt(&ok);
148 if(!ok)
149 {
150 qDebug()
151 << "Failed to extract the MS level of the mass spectrum.";
152
153 file_reading_failed = true;
154
155 break;
156 }
157
158 QString peak_shape_type = regexp_match.captured(6);
159
160 QString mz_range = regexp_match.captured(7);
161
162 double mz_range_start =
163 mz_range.left(mz_range.indexOf("-")).toDouble(&ok);
164 if(!ok)
165 {
166 qDebug() << "Failed to extract the start of the m/z range.";
167
168 file_reading_failed = true;
169
170 break;
171 }
172
173 double mz_range_end =
174 mz_range.right(mz_range.indexOf("-") + 1).toDouble(&ok);
175 if(!ok)
176 {
177 qDebug() << "Failed to extract the end of the m/z range.";
178
179 file_reading_failed = true;
180
181 break;
182 }
183
184 // qDebug() << qSetRealNumberPrecision(10)
185 // << "mz_range_start: " << mz_range_start
186 // << "mz_range_end: " << mz_range_end;
187
188 int peak_count = regexp_match.captured(8).toInt(&ok);
189 if(!ok)
190 {
191 qDebug() << "Failed to extract the number of peaks in the mass "
192 "spectrum.";
193
194 file_reading_failed = true;
195
196 break;
197 }
198
199 QString peaks = regexp_match.captured(9);
200 QStringList peaks_stringlist = peaks.split(",", Qt::SkipEmptyParts);
201
202 // qDebug() << "The number of peaks:" << peaks_stringlist.size();
203
204 // Sanity check:
205 if(peaks_stringlist.size() != peak_count)
206 {
207 // qDebug() << "The number of peaks in the mass spectrum does not
208 // "
209 // "match the advertised one.";
210
211 file_reading_failed = true;
212
213 break;
214 }
215
216 // qDebug() << "The retention time:" << retention_time
217 // << "the ionization mode: " << ionization_mode
218 // << "the source type: " << source_type
219 // << "MS level is:" << ms_level
220 // << "peak shape type: " << peak_shape_type
221 // << "m/z range: " << mz_range << "peak count: " <<
222 // peak_count
223 // << "and peaks: " << peaks.left(100) << " ... "
224 // << peaks.right(100) << "";
225
226 // If we are here, that means that the read line has conformed
227 // to the format expected.
228 ++line_count;
229 // qDebug() << "Checked one line more:" << line_count;
230 }
231 // End end of
232 // if(regexp_match.hasMatch())
233 else
234 {
235 qDebug() << "The match failed.";
236 file_reading_failed = true;
237
238 break;
239 }
240 }
241 // End of
242 // while(!file.atEnd())
243
244 file.close();
245
246 if(!file_reading_failed && line_count >= 1)
247 {
249 return true;
250 }
251
253
254 // qDebug() << "The number of parsed mass spectra: " << line_count;
255
256 // qDebug() << "Detected file format:"
257 // << Utils::msDataFormatAsString(m_fileFormat)
258 // << "with number of spectra: " << line_count;
259
260 return false;
261}
static QRegularExpression unsignedDoubleNumberNoExponentialRegExp
Definition utils.h:53
static QRegularExpression endOfLineRegExp
Regular expression that tracks the end of line in text files.
Definition utils.h:68
@ unknown
unknown format
static const std::size_t CHECKED_LINES_COUNT

References pappso::brukerBafAscii, pappso::CHECKED_LINES_COUNT, pappso::Utils::endOfLineRegExp, line, pappso::MsFileReader::m_fileFormat, pappso::MsFileReader::m_fileName, pappso::unknown, and pappso::Utils::unsignedDoubleNumberNoExponentialRegExp.

Referenced by getMsRunIds().

◆ selectMsRunReader()

MsRunReader * pappso::BafAsciiFileReader::selectMsRunReader ( const QString &  file_name) const

The documentation for this class was generated from the following files: