opentims++
opentims.h
1 /*
2  * OpenTIMS: a fully open-source library for opening Bruker's TimsTOF data files.
3  * Copyright (C) 2020 Michał Startek and Mateusz Łącki
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License, version 3 only,
7  * as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <https://www.gnu.org/licenses/>.
16  */
17 #pragma once
18 #include <cstdlib>
19 #include <cstdint>
20 #include <memory>
21 #include <string>
22 #include <iostream>
23 #include <vector>
24 #include <unordered_map>
25 
26 #include "platform.h"
27 
28 #ifndef OPENTIMS_BUILDING_R
29 #include "sqlite/sqlite3.h"
30 #endif
31 
32 #include "zstd/zstd.h"
33 
34 
35 #ifdef OPENTIMS_BUILDING_R
36 #define STRICT_R_HEADERS
37 #include "mio.h"
38 #include <Rcpp.h>
39 #else
40 #include "mio.hpp"
41 #endif
42 
43 
44 
45 class TimsDataHandle;
46 
47 int tims_sql_callback(void* out, int cols, char** row, char** colnames);
48 
49 class TimsFrame
50 {
51  std::unique_ptr<char[]> back_buffer;
52 
53  char* bytes0;
54  char* bytes1;
55  char* bytes2;
56  char* bytes3;
57 
58  inline uint32_t back_data(size_t index)
59  {
60  uint32_t ret;
61  char* bytes = reinterpret_cast<char*>(&ret);
62 
63  bytes[0] = bytes0[index];
64  bytes[1] = bytes1[index];
65  bytes[2] = bytes2[index];
66  bytes[3] = bytes3[index];
67 
68  return ret;
69  }
70 
71  const char * const tims_bin_frame;
72 
73  friend class TimsDataHandle;
74  friend int tims_sql_callback(void* out, int cols, char** row, char** colnames);
75 
76  TimsDataHandle& parent_tdh;
77 
78  /* TODO: implement the below one day.
79  template void save_to_buffs_impl<bool frame_ids_present,
80  bool scan_ids_present,
81  bool intensities_present,
82  bool mzs_present,
83  bool inv_ion_mobilities_present,
84  bool retention_times_present,
85  > (uint32_t* frame_ids,
86  uint32_t* scan_ids,
87  uint32_t* tofs,
88  uint32_t* intensities,
89  double* mzs,
90  double* inv_ion_mobilities,
91  double* retention_times,
92  ZSTD_DCtx* decomp_ctx = nullptr);
93  */
94 
95  TimsFrame(uint32_t _id,
96  uint32_t _num_scans,
97  uint32_t _num_peaks,
98  uint32_t _msms_type,
99  double _intensity_correction,
100  double _time,
101  const char* frame_ptr,
102  TimsDataHandle& parent_hndl
103  );
104 
105  static TimsFrame TimsFrameFromSql(char** sql_row,
106  TimsDataHandle& parent_handle);
107 
108  inline size_t data_size_ints() const { return num_scans + num_peaks + num_peaks; };
109 
110 
111 public:
112 
113  const uint32_t id;
114  const uint32_t num_scans;
115  const uint32_t num_peaks;
116  const uint32_t msms_type;
117 private:
118  const double intensity_correction;
119  const double time;
120 
121 public:
123  void print() const;
124 
126  inline size_t data_size_bytes() const { return data_size_ints() * 4; };
127 
129 
148  void decompress(char* decompression_buffer = nullptr, ZSTD_DCtx* decomp_ctx = nullptr);
149 
151  void close();
152 
154 
168  void save_to_buffs(uint32_t* frame_ids,
169  uint32_t* scan_ids,
170  uint32_t* tofs,
171  uint32_t* intensities,
172  double* mzs,
173  double* inv_ion_mobilities,
174  double* retention_times,
175  ZSTD_DCtx* decomp_ctx = nullptr);
176 
178  void save_to_matrix_buffer(uint32_t* buf,
179  ZSTD_DCtx* decomp_ctx = nullptr)
180  { save_to_buffs(buf, buf+num_peaks, buf+2*num_peaks, buf+3*num_peaks, nullptr, nullptr, nullptr, decomp_ctx); };
181 
182  friend class TimsDataHandle;
183 };
184 
185 class BrukerTof2MzConverter;
186 class Tof2MzConverter;
187 class BrukerScan2InvIonMobilityConverter;
188 class Scan2InvIonMobilityConverter;
189 
191 {
192 friend class BrukerTof2MzConverter;
193 friend class BrukerScan2InvIonMobilityConverter;
194 
195 private:
196  const std::string tims_dir_path;
197  mio::mmap_source tims_data_bin;
198  std::unordered_map<uint32_t, TimsFrame> frame_descs;
199  void read_sql(const std::string& tims_tdf_path);
200  uint32_t _min_frame_id;
201  uint32_t _max_frame_id;
202 
203  std::unique_ptr<char[]> decompression_buffer;
204 
205  std::unique_ptr<uint32_t[]> _scan_ids_buffer;
206  std::unique_ptr<uint32_t[]> _tofs_buffer;
207  std::unique_ptr<uint32_t[]> _intensities_buffer;
208 
209  ZSTD_DCtx* zstd_dctx;
210 
211 #ifndef OPENTIMS_BUILDING_R
212  sqlite3* db_conn;
213 #endif
214 
215  std::unique_ptr<Tof2MzConverter> tof2mz_converter;
216  std::unique_ptr<Scan2InvIonMobilityConverter> scan2inv_ion_mobility_converter;
217 
218  void init();
219 
220 #ifdef OPENTIMS_BUILDING_R
221  void* setupFromAnalysisList(const Rcpp::List& analysis_tdf);
222 #endif /* OPENTIMS_BUILDING_R */
223 
224  TimsDataHandle(const std::string& tims_tdf_bin_path,
225  const std::string& tims_tdf_path,
226  const std::string& tims_data_dir);
227 
228  void set_converter(std::unique_ptr<Tof2MzConverter>&& converter);
229  void set_converter(std::unique_ptr<Scan2InvIonMobilityConverter>&& converter);
230 
231 public:
232 
234 
241  TimsDataHandle(const std::string& tims_data_dir);
242 
243 #ifdef OPENTIMS_BUILDING_R
244  TimsDataHandle(const std::string& tims_data_dir, const Rcpp::List& analysis_tdf);
246 #endif /* OPENTIMS_BUILDING_R */
247 
249  ~TimsDataHandle();
250 
252  TimsFrame& get_frame(uint32_t frame_no);
253 
255  const std::unordered_map<uint32_t, TimsFrame>& get_frame_descs();
256 
258  size_t no_peaks_total() const;
259 
261 
267  size_t no_peaks_in_frames(const uint32_t indexes[],
268  size_t no_indexes);
269 
271 
276  size_t no_peaks_in_frames(const std::vector<uint32_t>& indexes)
277  { return no_peaks_in_frames(indexes.data(), indexes.size()); };
278 
280 
284  size_t no_peaks_in_slice(uint32_t start,
285  uint32_t end,
286  uint32_t step);
287 
289  uint32_t min_frame_id() const { return _min_frame_id; };
290 
292  uint32_t max_frame_id() const { return _max_frame_id; };
293 
295  bool has_frame(uint32_t frame_id) const { return frame_descs.count(frame_id) > 0; };
296 
298  void extract_frames(const uint32_t* indexes,
299  size_t no_indexes,
300  uint32_t* result);
301 
303  void extract_frames(const std::vector<uint32_t>& indexes,
304  uint32_t* result)
305  { extract_frames(indexes.data(), indexes.size(), result); };
306 
308  void extract_frames_slice(uint32_t start,
309  uint32_t end,
310  uint32_t step,
311  uint32_t* result);
312 
314  void extract_frames(const uint32_t* indexes,
315  size_t no_indexes,
316  uint32_t* frame_ids,
317  uint32_t* scan_ids,
318  uint32_t* tofs,
319  uint32_t* intensities,
320  double* mzs,
321  double* inv_ion_mobilities,
322  double* retention_times);
323 
325 
340  void extract_frames(const std::vector<uint32_t>& indexes,
341  uint32_t* frame_ids,
342  uint32_t* scan_ids,
343  uint32_t* tofs,
344  uint32_t* intensities,
345  double* mzs,
346  double* inv_ion_mobilities,
347  double* retention_times)
348  { extract_frames(indexes.data(),
349  indexes.size(),
350  frame_ids,
351  scan_ids,
352  tofs,
353  intensities,
354  mzs,
355  inv_ion_mobilities,
356  retention_times); };
357 
359 
378  void extract_frames_slice(uint32_t start,
379  uint32_t end,
380  uint32_t step,
381  uint32_t* frame_ids,
382  uint32_t* scan_ids,
383  uint32_t* tofs,
384  uint32_t* intensities,
385  double* mzs,
386  double* inv_ion_mobilities,
387  double* retention_times);
388 
389  void allocate_buffers();
390 
391  inline void ensure_buffers_allocated() { if(_scan_ids_buffer) return; allocate_buffers(); };
392 
393  void free_buffers();
394 
396  size_t max_peaks_in_frame();
397 
399  size_t expose_frame(size_t frame_id);
400 
402  const std::unique_ptr<uint32_t[]>& scan_ids_buffer() { return _scan_ids_buffer; };
403 
405  const std::unique_ptr<uint32_t[]>& tofs_buffer() { return _tofs_buffer; };
406 
408  const std::unique_ptr<uint32_t[]>& intensities_buffer() { return _intensities_buffer; };
409 
410 // const sqlite3* db_connection() { return db_conn; };
411 
412  friend int tims_sql_callback(void* out, int cols, char** row, char** colnames);
413 
414  friend class TimsFrame;
415 };
TimsDataHandle::no_peaks_in_slice
size_t no_peaks_in_slice(uint32_t start, uint32_t end, uint32_t step)
Count the peaks in a subset of frames, selected by a slice.
TimsDataHandle::max_peaks_in_frame
size_t max_peaks_in_frame()
Return the maximal number of peaks in the biggest frame in this dataset.
TimsFrame
Definition: opentims.h:49
TimsFrame::save_to_matrix_buffer
void save_to_matrix_buffer(uint32_t *buf, ZSTD_DCtx *decomp_ctx=nullptr)
This function is deprecated and intentionally undocumented; do not use.
Definition: opentims.h:178
TimsDataHandle::no_peaks_in_frames
size_t no_peaks_in_frames(const std::vector< uint32_t > &indexes)
Count the peaks in a subset of frames.
Definition: opentims.h:276
TimsFrame::num_peaks
const uint32_t num_peaks
Number of peaks this frame contains (summed across all scans)
Definition: opentims.h:115
TimsFrame::msms_type
const uint32_t msms_type
The MS/MS type of this frame.
Definition: opentims.h:116
TimsDataHandle::intensities_buffer
const std::unique_ptr< uint32_t[]> & intensities_buffer()
Expermental API - use discouraged.
Definition: opentims.h:408
TimsDataHandle::max_frame_id
uint32_t max_frame_id() const
Access the highest id of a valid frame from this dataset.
Definition: opentims.h:292
TimsFrame::data_size_bytes
size_t data_size_bytes() const
Return the size of back buffer needed to store raw TIMS data.
Definition: opentims.h:126
TimsDataHandle::no_peaks_total
size_t no_peaks_total() const
Returns the total number of MS peaks in this handle.
TimsDataHandle::extract_frames_slice
void extract_frames_slice(uint32_t start, uint32_t end, uint32_t step, uint32_t *result)
This function is deprecated, and left deliberately undocumented; do not use.
TimsDataHandle::scan_ids_buffer
const std::unique_ptr< uint32_t[]> & scan_ids_buffer()
Expermental API - use discouraged.
Definition: opentims.h:402
TimsDataHandle::tofs_buffer
const std::unique_ptr< uint32_t[]> & tofs_buffer()
Expermental API - use discouraged.
Definition: opentims.h:405
TimsFrame::decompress
void decompress(char *decompression_buffer=nullptr, ZSTD_DCtx *decomp_ctx=nullptr)
Precalculates and memorizes all the information contained within this frame.
TimsFrame::save_to_buffs
void save_to_buffs(uint32_t *frame_ids, uint32_t *scan_ids, uint32_t *tofs, uint32_t *intensities, double *mzs, double *inv_ion_mobilities, double *retention_times, ZSTD_DCtx *decomp_ctx=nullptr)
Retrieve the MS peak data held by the frame.
TimsFrame::num_scans
const uint32_t num_scans
Number of scans this frame contains.
Definition: opentims.h:114
TimsDataHandle::expose_frame
size_t expose_frame(size_t frame_id)
Expermental API - use discouraged.
TimsDataHandle
Definition: opentims.h:190
TimsDataHandle::extract_frames
void extract_frames(const uint32_t *indexes, size_t no_indexes, uint32_t *result)
This function is deprecated, and left deliberately undocumented; do not use.
TimsFrame::print
void print() const
Prints out to stdout a short summary of this frame.
TimsDataHandle::no_peaks_in_frames
size_t no_peaks_in_frames(const uint32_t indexes[], size_t no_indexes)
Count the peaks in a subset of frames.
TimsFrame::close
void close()
Releases the storage taken by decompress() method, without destroying the frame. Will be called in de...
TimsDataHandle::~TimsDataHandle
~TimsDataHandle()
Close and deallocate the TimsTOF data handle (destructor).
TimsDataHandle::get_frame_descs
const std::unordered_map< uint32_t, TimsFrame > & get_frame_descs()
Access a dictionary containing all the frames from this dataset, keyed by ID.
TimsDataHandle::extract_frames
void extract_frames(const std::vector< uint32_t > &indexes, uint32_t *result)
This function is deprecated, and left deliberately undocumented; do not use.
Definition: opentims.h:303
TimsDataHandle::has_frame
bool has_frame(uint32_t frame_id) const
Check whether a frame with provided ID exists in the dataset.
Definition: opentims.h:295
TimsDataHandle::min_frame_id
uint32_t min_frame_id() const
Access the lowest id of a valid frame from this dataset.
Definition: opentims.h:289
TimsDataHandle::get_frame
TimsFrame & get_frame(uint32_t frame_no)
Access a single frame by its ID.
TimsFrame::id
const uint32_t id
ID of the frame.
Definition: opentims.h:108
TimsDataHandle::extract_frames
void extract_frames(const std::vector< uint32_t > &indexes, uint32_t *frame_ids, uint32_t *scan_ids, uint32_t *tofs, uint32_t *intensities, double *mzs, double *inv_ion_mobilities, double *retention_times)
Extract a subset of frames, selected by indexes, filling provided buffers with MS peak data.
Definition: opentims.h:340