Coverage for peakipy/io.py: 93%
509 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-15 20:54 -0400
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-15 20:54 -0400
1import sys
2from pathlib import Path
3from enum import Enum
5import numpy as np
6import nmrglue as ng
7import pandas as pd
8import textwrap
9from rich import print
10from rich.console import Console
13from bokeh.palettes import Category20
14from scipy import ndimage
15from skimage.morphology import binary_closing, disk, footprint_rectangle
16from skimage.filters import threshold_otsu
17from pydantic import BaseModel
19from peakipy.utils import df_to_rich_table
20from peakipy.fitting import make_mask
22console = Console()
25class StrucEl(str, Enum):
26 square = "square"
27 disk = "disk"
28 rectangle = "rectangle"
29 mask_method = "mask_method"
32class PeaklistFormat(str, Enum):
33 a2 = "a2"
34 a3 = "a3"
35 sparky = "sparky"
36 pipe = "pipe"
37 peakipy = "peakipy"
38 csv = "csv"
41class OutFmt(str, Enum):
42 csv = "csv"
43 pkl = "pkl"
46class PeaklistColumns(BaseModel):
47 """These are the columns required for performing fits in peakipy"""
49 INDEX: int
50 X_AXIS: int
51 Y_AXIS: int
52 X_AXISf: float
53 Y_AXISf: float
54 X_PPM: float
55 Y_PPM: float
56 XW: float
57 YW: float
58 XW_HZ: float
59 YW_HZ: float
60 HEIGHT: float
61 VOL: float
62 ASS: str
63 X_RADIUS: float
64 Y_RADIUS: float
65 X_RADIUS_PPM: float
66 Y_RADIUS_PPM: float
67 include: str
70class PeaklistColumnsWithClusters(PeaklistColumns):
71 CLUSTID: int
72 MEMCNT: int
73 color: str
76class Pseudo3D:
77 """Read dic, data from NMRGlue and dims from input to create a Pseudo3D dataset
79 :param dic: from nmrglue.pipe.read
80 :type dic: dict
82 :param data: data from nmrglue.pipe.read
83 :type data: numpy.array
85 :param dims: dimension order i.e [0,1,2] where 0 = planes, 1 = f1, 2 = f2
86 :type dims: list
87 """
89 def __init__(self, dic, data, dims):
90 # check dimensions
91 self._udic = ng.pipe.guess_udic(dic, data)
92 self._ndim = self._udic["ndim"]
94 if self._ndim == 1:
95 err = f"""[red]
96 ##########################################
97 NMR Data should be either 2D or 3D
98 ##########################################
99 [/red]"""
100 # raise TypeError(err)
101 sys.exit(err)
103 # check that spectrum has correct number of dims
104 elif self._ndim != len(dims):
105 err = f"""[red]
106 #################################################################
107 Your spectrum has {self._ndim} dimensions with shape {data.shape}
108 but you have given a dimension order of {dims}...
109 #################################################################
110 [/red]"""
111 # raise ValueError(err)
112 sys.exit(err)
114 elif (self._ndim == 2) and (len(dims) == 2):
115 self._f1_dim, self._f2_dim = dims
116 self._planes = 0
117 self._uc_f1 = ng.pipe.make_uc(dic, data, dim=self._f1_dim)
118 self._uc_f2 = ng.pipe.make_uc(dic, data, dim=self._f2_dim)
119 # make data pseudo3d
120 self._data = data.reshape((1, data.shape[0], data.shape[1]))
121 self._dims = [self._planes, self._f1_dim + 1, self._f2_dim + 1]
123 else:
124 self._planes, self._f1_dim, self._f2_dim = dims
125 self._dims = dims
126 self._data = data
127 # make unit conversion dicts
128 self._uc_f2 = ng.pipe.make_uc(dic, data, dim=self._f2_dim)
129 self._uc_f1 = ng.pipe.make_uc(dic, data, dim=self._f1_dim)
131 # rearrange data if dims not in standard order
132 if self._dims != [0, 1, 2]:
133 # np.argsort returns indices of array for order 0,1,2 to transpose data correctly
134 # self._dims = np.argsort(self._dims)
135 self._data = np.transpose(data, self._dims)
137 self._dic = dic
139 self._f1_label = self._udic[self._f1_dim]["label"]
140 self._f2_label = self._udic[self._f2_dim]["label"]
142 @property
143 def uc_f1(self):
144 """Return unit conversion dict for F1"""
145 return self._uc_f1
147 @property
148 def uc_f2(self):
149 """Return unit conversion dict for F2"""
150 return self._uc_f2
152 @property
153 def dims(self):
154 """Return dimension order"""
155 return self._dims
157 @property
158 def data(self):
159 """Return array containing data"""
160 return self._data
162 @data.setter
163 def data(self, data):
164 self._data = data
166 @property
167 def dic(self):
168 return self._dic
170 @property
171 def udic(self):
172 return self._udic
174 @property
175 def ndim(self):
176 return self._ndim
178 @property
179 def f1_label(self):
180 # dim label
181 return self._f1_label
183 @property
184 def f2_label(self):
185 # dim label
186 return self._f2_label
188 @property
189 def planes(self):
190 return self.dims[0]
192 @property
193 def n_planes(self):
194 return self.data.shape[self.planes]
196 @property
197 def f1(self):
198 return self.dims[1]
200 @property
201 def f2(self):
202 return self.dims[2]
204 # size of f1 and f2 in points
205 @property
206 def f2_size(self):
207 """Return size of f2 dimension in points"""
208 return self._udic[self._f2_dim]["size"]
210 @property
211 def f1_size(self):
212 """Return size of f1 dimension in points"""
213 return self._udic[self._f1_dim]["size"]
215 # points per ppm
216 @property
217 def pt_per_ppm_f1(self):
218 return self.f1_size / (
219 self._udic[self._f1_dim]["sw"] / self._udic[self._f1_dim]["obs"]
220 )
222 @property
223 def pt_per_ppm_f2(self):
224 return self.f2_size / (
225 self._udic[self._f2_dim]["sw"] / self._udic[self._f2_dim]["obs"]
226 )
228 # points per hz
229 @property
230 def pt_per_hz_f1(self):
231 return self.f1_size / self._udic[self._f1_dim]["sw"]
233 @property
234 def pt_per_hz_f2(self):
235 return self.f2_size / self._udic[self._f2_dim]["sw"]
237 # hz per point
238 @property
239 def hz_per_pt_f1(self):
240 return 1.0 / self.pt_per_hz_f1
242 @property
243 def hz_per_pt_f2(self):
244 return 1.0 / self.pt_per_hz_f2
246 # ppm per point
247 @property
248 def ppm_per_pt_f1(self):
249 return 1.0 / self.pt_per_ppm_f1
251 @property
252 def ppm_per_pt_f2(self):
253 return 1.0 / self.pt_per_ppm_f2
255 # get ppm limits for ppm scales
256 @property
257 def f2_ppm_scale(self):
258 return self.uc_f2.ppm_scale()
260 @property
261 def f1_ppm_scale(self):
262 return self.uc_f1.ppm_scale()
264 @property
265 def f2_ppm_limits(self):
266 return self.uc_f2.ppm_limits()
268 @property
269 def f1_ppm_limits(self):
270 return self.uc_f1.ppm_limits()
272 @property
273 def f1_ppm_max(self):
274 return max(self.f1_ppm_limits)
276 @property
277 def f1_ppm_min(self):
278 return min(self.f1_ppm_limits)
280 @property
281 def f2_ppm_max(self):
282 return max(self.f2_ppm_limits)
284 @property
285 def f2_ppm_min(self):
286 return min(self.f2_ppm_limits)
288 @property
289 def f2_ppm_0(self):
290 return self.f2_ppm_limits[0]
292 @property
293 def f2_ppm_1(self):
294 return self.f2_ppm_limits[1]
296 @property
297 def f1_ppm_0(self):
298 return self.f1_ppm_limits[0]
300 @property
301 def f1_ppm_1(self):
302 return self.f1_ppm_limits[1]
305class UnknownFormat(Exception):
306 pass
310class Peaklist(Pseudo3D):
311 """Read analysis, sparky or NMRPipe peak list and convert to NMRPipe-ish format also find peak clusters
313 Parameters
314 ----------
315 path : path-like or str
316 path to peaklist
317 data_path : ndarray
318 NMRPipe format data
319 fmt : str
320 a2|a3|sparky|pipe
321 dims: list
322 [planes,y,x]
323 radii: list
324 [x,y] Mask radii in ppm
327 Methods
328 -------
330 clusters :
331 mask_method :
332 adaptive_clusters :
334 Returns
335 -------
336 df : pandas DataFrame
337 dataframe containing peaklist
339 """
341 def __init__(
342 self,
343 path,
344 data_path,
345 fmt: PeaklistFormat = PeaklistFormat.a2,
346 dims=[0, 1, 2],
347 radii=[0.04, 0.4],
348 posF1="Position F2",
349 posF2="Position F1",
350 verbose=False,
351 ):
352 dic, data = ng.pipe.read(data_path)
353 Pseudo3D.__init__(self, dic, data, dims)
354 self.fmt = fmt
355 self.peaklist_path = path
356 self.data_path = data_path
357 self.verbose = verbose
358 self._radii = radii
359 self._thres = None
360 if self.verbose:
361 print(
362 "Points per hz f1 = %.3f, f2 = %.3f"
363 % (self.pt_per_hz_f1, self.pt_per_hz_f2)
364 )
366 self._analysis_to_pipe_dic = {
367 "#": "INDEX",
368 "Position F1": "X_PPM",
369 "Position F2": "Y_PPM",
370 "Line Width F1 (Hz)": "XW_HZ",
371 "Line Width F2 (Hz)": "YW_HZ",
372 "Height": "HEIGHT",
373 "Volume": "VOL",
374 }
375 self._assign_to_pipe_dic = {
376 "#": "INDEX",
377 "Pos F1": "X_PPM",
378 "Pos F2": "Y_PPM",
379 "LW F1 (Hz)": "XW_HZ",
380 "LW F2 (Hz)": "YW_HZ",
381 "Height": "HEIGHT",
382 "Volume": "VOL",
383 }
385 self._sparky_to_pipe_dic = {
386 "index": "INDEX",
387 "w1": "X_PPM",
388 "w2": "Y_PPM",
389 "lw1 (hz)": "XW_HZ",
390 "lw2 (hz)": "YW_HZ",
391 "Height": "HEIGHT",
392 "Volume": "VOL",
393 "Assignment": "ASS",
394 }
396 self._analysis_to_pipe_dic[posF1] = "Y_PPM"
397 self._analysis_to_pipe_dic[posF2] = "X_PPM"
399 self._df = self.read_peaklist()
401 def read_peaklist(self):
402 match self.fmt:
403 case self.fmt.a2:
404 self._df = self._read_analysis()
406 case self.fmt.a3:
407 self._df = self._read_assign()
409 case self.fmt.sparky:
410 self._df = self._read_sparky()
412 case self.fmt.pipe:
413 self._df = self._read_pipe()
415 case self.fmt.csv:
416 self._df = self._read_csv()
418 case _:
419 raise UnknownFormat("I don't know this format: {self.fmt}")
421 return self._df
423 @property
424 def df(self):
425 return self._df
427 @df.setter
428 def df(self, df):
429 self._df = df
430 return self._df
432 @property
433 def radii(self):
434 return self._radii
436 def check_radius_contains_enough_points_for_fitting(self, radius, pt_per_ppm, flag):
437 if (radius * pt_per_ppm) < 2.0:
438 new_radius = 2.0 * (1./ pt_per_ppm)
439 print(
440 "\n",
441 f"[red]Warning: {flag} is set to {radius:.3f} ppm which is {radius * pt_per_ppm:.3f} points[/red]" + "\n",
442 f"[yellow]Setting to 2 points which is {new_radius:.3f} ppm[/yellow]" + "\n",
443 f"[yellow]Consider increasing this value to improve robustness of fitting (or increase zero filling)[/yellow]" + "\n",
444 )
445 else:
446 new_radius = radius
447 return new_radius
449 @property
450 def f2_radius(self):
451 """radius for fitting mask in f2"""
452 _f2_radius = self.check_radius_contains_enough_points_for_fitting(self.radii[0], self.pt_per_ppm_f2, "--x-radius-ppm")
453 return _f2_radius
455 @property
456 def f1_radius(self):
457 """radius for fitting mask in f1"""
458 _f1_radius = self.check_radius_contains_enough_points_for_fitting(self.radii[1], self.pt_per_ppm_f1, "--y-radius-ppm")
459 return _f1_radius
461 @property
462 def analysis_to_pipe_dic(self):
463 return self._analysis_to_pipe_dic
465 @property
466 def assign_to_pipe_dic(self):
467 return self._assign_to_pipe_dic
469 @property
470 def sparky_to_pipe_dic(self):
471 return self._sparky_to_pipe_dic
473 @property
474 def thres(self):
475 if self._thres == None:
476 self._thres = abs(threshold_otsu(self.data[0]))
477 return self._thres
478 else:
479 return self._thres
481 def validate_peaklist(self):
482 self.df = pd.DataFrame(
483 [
484 PeaklistColumns(**i).model_dump()
485 for i in self.df.to_dict(orient="records")
486 ]
487 )
488 return self.df
490 def update_df(self):
491 # int point value
492 self.df["X_AXIS"] = self.df.X_PPM.apply(lambda x: self.uc_f2(x, "ppm"))
493 self.df["Y_AXIS"] = self.df.Y_PPM.apply(lambda x: self.uc_f1(x, "ppm"))
494 # decimal point value
495 self.df["X_AXISf"] = self.df.X_PPM.apply(lambda x: self.uc_f2.f(x, "ppm"))
496 self.df["Y_AXISf"] = self.df.Y_PPM.apply(lambda x: self.uc_f1.f(x, "ppm"))
497 # in case of missing values (should estimate though)
498 self.df["XW_HZ"] = self.df.XW_HZ.replace("None", "20.0")
499 self.df["YW_HZ"] = self.df.YW_HZ.replace("None", "20.0")
500 self.df["XW_HZ"] = self.df.XW_HZ.replace(np.nan, "20.0")
501 self.df["YW_HZ"] = self.df.YW_HZ.replace(np.nan, "20.0")
502 # convert linewidths to float
503 self.df["XW_HZ"] = self.df.XW_HZ.apply(lambda x: float(x))
504 self.df["YW_HZ"] = self.df.YW_HZ.apply(lambda x: float(x))
505 # convert Hz lw to points
506 self.df["XW"] = self.df.XW_HZ.apply(lambda x: x * self.pt_per_hz_f2)
507 self.df["YW"] = self.df.YW_HZ.apply(lambda x: x * self.pt_per_hz_f1)
508 # makes an assignment column from Assign F1 and Assign F2 columns
509 # in analysis2.x and ccpnmr v3 assign peak lists
510 if self.fmt in [PeaklistFormat.a2, PeaklistFormat.a3]:
511 self.df["ASS"] = self.df.apply(
512 # lambda i: "".join([i["Assign F1"], i["Assign F2"]]), axis=1
513 lambda i: f"{i['Assign F1']}_{i['Assign F2']}",
514 axis=1,
515 )
517 # make default values for X and Y radii for fit masks
518 self.df["X_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f2_radius
519 self.df["Y_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f1_radius
520 self.df["X_RADIUS"] = self.df.X_RADIUS_PPM.apply(
521 lambda x: x * self.pt_per_ppm_f2
522 )
523 self.df["Y_RADIUS"] = self.df.Y_RADIUS_PPM.apply(
524 lambda x: x * self.pt_per_ppm_f1
525 )
526 # add include column
527 if "include" in self.df.columns:
528 pass
529 else:
530 self.df["include"] = self.df.apply(lambda x: "yes", axis=1)
532 # check assignments for duplicates
533 self.check_assignments()
534 # check that peaks are within the bounds of the data
535 self.check_peak_bounds()
536 self.validate_peaklist()
538 def add_fix_bound_columns(self):
539 """add columns containing parameter bounds (param_upper/param_lower)
540 and whether or not parameter should be fixed (yes/no)
542 For parameter bounding:
544 Column names are <param_name>_upper and <param_name>_lower for upper and lower bounds respectively.
545 Values are given as floating point. Value of 0.0 indicates that parameter is unbounded
546 X/Y positions are given in ppm
547 Linewidths are given in Hz
549 For parameter fixing:
551 Column names are <param_name>_fix.
552 Values are given as a string 'yes' or 'no'
554 """
555 pass
557 def _read_analysis(self):
558 df = pd.read_csv(self.peaklist_path, delimiter="\t")
559 new_columns = [self.analysis_to_pipe_dic.get(i, i) for i in df.columns]
560 pipe_columns = dict(zip(df.columns, new_columns))
561 df = df.rename(index=str, columns=pipe_columns)
563 return df
565 def _read_assign(self):
566 df = pd.read_csv(self.peaklist_path, delimiter="\t")
567 new_columns = [self.assign_to_pipe_dic.get(i, i) for i in df.columns]
568 pipe_columns = dict(zip(df.columns, new_columns))
569 df = df.rename(index=str, columns=pipe_columns)
571 return df
573 def _read_sparky(self):
574 df = pd.read_csv(
575 self.peaklist_path,
576 skiprows=1,
577 sep=r"\s+",
578 names=["ASS", "Y_PPM", "X_PPM"],
579 # use only first three columns
580 usecols=[i for i in range(3)],
581 )
582 df["INDEX"] = df.index
583 # need to add LW estimate
584 df["XW_HZ"] = 20.0
585 df["YW_HZ"] = 20.0
586 # dummy values
587 df["HEIGHT"] = 0.0
588 df["VOL"] = 0.0
589 return df
591 def _read_pipe(self):
592 to_skip = 0
593 with open(self.peaklist_path) as f:
594 lines = f.readlines()
595 for line in lines:
596 if line.startswith("VARS"):
597 columns = line.strip().split()[1:]
598 elif line[:5].strip(" ").isdigit():
599 break
600 else:
601 to_skip += 1
602 df = pd.read_csv(
603 self.peaklist_path, skiprows=to_skip, names=columns, sep=r"\s+"
604 )
605 return df
607 def _read_csv(self):
608 """ Read a csv file containing peaklist data
610 Requires the following columns:
611 X_PPM: ppm position of peak in X axis
612 Y_PPM: ppm position of peak in Y axis
613 ASS: assignment of peak
614 Optional columns include:
615 XW_HZ: estimated X axis linewidth in HZ
616 YW_HZ: estimated Y axis linewidth in HZ
617 VOL: peak volume
618 Height: peak height
619 """
620 df = pd.read_csv(self.peaklist_path)
621 df["INDEX"] = df.index
622 # need to add LW estimate
623 if not "XW_HZ" in df.columns:
624 df["XW_HZ"] = 20.0
625 if not "YW_HZ" in df.columns:
626 df["YW_HZ"] = 20.0
627 # dummy values
628 if not "HEIGHT" in df.columns:
629 df["HEIGHT"] = 0.0
630 if not "VOL" in df.columns:
631 df["VOL"] = 0.0
632 return df
634 def check_assignments(self):
635 # self.df["ASS"] = self.df.
636 self.df["ASS"] = self.df.ASS.astype(object)
637 self.df.loc[self.df["ASS"].isnull(), "ASS"] = "None_dummy_0"
638 self.df["ASS"] = self.df.ASS.astype(str)
639 duplicates_bool = self.df.ASS.duplicated()
640 duplicates = self.df.ASS[duplicates_bool]
641 if len(duplicates) > 0:
642 console.print(
643 textwrap.dedent(
644 """
645 #############################################################################
646 You have duplicated assignments in your list...
647 Currently each peak needs a unique assignment. Sorry about that buddy...
648 #############################################################################
649 """
650 ),
651 style="yellow",
652 )
653 self.df.loc[duplicates_bool, "ASS"] = [
654 f"{i}_dummy_{num+1}" for num, i in enumerate(duplicates)
655 ]
656 if self.verbose:
657 print("Here are the duplicates")
658 print(duplicates)
659 print(self.df.ASS)
661 print(
662 textwrap.dedent(
663 """
664 Creating dummy assignments for duplicates
666 """
667 )
668 )
670 def check_peak_bounds(self):
671 columns_to_print = ["INDEX", "ASS", "X_AXIS", "Y_AXIS", "X_PPM", "Y_PPM"]
672 # check that peaks are within the bounds of spectrum
673 within_x = (self.df.X_PPM < self.f2_ppm_max) & (self.df.X_PPM > self.f2_ppm_min)
674 within_y = (self.df.Y_PPM < self.f1_ppm_max) & (self.df.Y_PPM > self.f1_ppm_min)
675 self.excluded = self.df[~(within_x & within_y)]
676 self.df = self.df[within_x & within_y]
677 if len(self.excluded) > 0:
678 print(
679 textwrap.dedent(
680 f"""[red]
681 #################################################################################
683 Excluding the following peaks as they are not within the spectrum which has shape
685 {self.data.shape}
686 [/red]"""
687 )
688 )
689 table_to_print = df_to_rich_table(
690 self.excluded,
691 title="Excluded",
692 columns=columns_to_print,
693 styles=["red" for i in columns_to_print],
694 )
695 print(table_to_print)
696 print(
697 "[red]#################################################################################[/red]"
698 )
700 def clusters(
701 self,
702 thres=None,
703 struc_el: StrucEl = StrucEl.disk,
704 struc_size=(3,),
705 l_struc=None,
706 ):
707 """Find clusters of peaks
709 :param thres: threshold for positive signals above which clusters are selected. If None then threshold_otsu is used
710 :type thres: float
712 :param struc_el: 'square'|'disk'|'rectangle'
713 structuring element for binary_closing of thresholded data can be square, disc or rectangle
714 :type struc_el: str
716 :param struc_size: size/dimensions of structuring element
717 for square and disk first element of tuple is used (for disk value corresponds to radius)
718 for rectangle, tuple corresponds to (width,height).
719 :type struc_size: tuple
722 """
723 peaks = [[y, x] for y, x in zip(self.df.Y_AXIS, self.df.X_AXIS)]
725 if thres == None:
726 thres = self.thres
727 self._thres = abs(threshold_otsu(self.data[0]))
728 else:
729 self._thres = thres
731 # get positive and negative
732 thresh_data = np.bitwise_or(
733 self.data[0] < (self._thres * -1.0), self.data[0] > self._thres
734 )
736 match struc_el:
737 case struc_el.disk:
738 radius = struc_size[0]
739 if self.verbose:
740 print(f"using disk with {radius}")
741 closed_data = binary_closing(thresh_data, disk(int(radius)))
743 case struc_el.square:
744 width = struc_size[0]
745 if self.verbose:
746 print(f"using square with {width}")
747 closed_data = binary_closing(thresh_data, footprint_rectangle((int(width),int(width))))
749 case struc_el.rectangle:
750 width, height = struc_size
751 if self.verbose:
752 print(f"using rectangle with {width} and {height}")
753 closed_data = binary_closing(
754 thresh_data, footprint_rectangle((int(width), int(height)))
755 )
757 case _:
758 if self.verbose:
759 print(f"Not using any closing function")
760 closed_data = thresh_data
762 labeled_array, num_features = ndimage.label(closed_data, l_struc)
764 self.df.loc[:, "CLUSTID"] = [labeled_array[i[0], i[1]] for i in peaks]
766 # renumber "0" clusters
767 max_clustid = self.df["CLUSTID"].max()
768 n_of_zeros = len(self.df[self.df["CLUSTID"] == 0]["CLUSTID"])
769 self.df.loc[self.df[self.df["CLUSTID"] == 0].index, "CLUSTID"] = np.arange(
770 max_clustid + 1, n_of_zeros + max_clustid + 1, dtype=int
771 )
773 # count how many peaks per cluster
774 for ind, group in self.df.groupby("CLUSTID"):
775 self.df.loc[group.index, "MEMCNT"] = len(group)
777 self.df.loc[:, "color"] = self.df.apply(
778 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
779 axis=1,
780 )
781 return ClustersResult(labeled_array, num_features, closed_data, peaks)
783 def mask_method(self, overlap=1.0, l_struc=None):
784 """connect clusters based on overlap of fitting masks
786 :param overlap: fraction of mask for which overlaps are calculated
787 :type overlap: float
789 :returns ClusterResult: Instance of ClusterResult
790 :rtype: ClustersResult
791 """
792 # overlap is positive
793 overlap = abs(overlap)
795 self._thres = threshold_otsu(self.data[0])
797 mask = np.zeros(self.data[0].shape, dtype=bool)
799 for ind, peak in self.df.iterrows():
800 mask += make_mask(
801 self.data[0],
802 peak.X_AXISf,
803 peak.Y_AXISf,
804 peak.X_RADIUS * overlap,
805 peak.Y_RADIUS * overlap,
806 )
808 peaks = [[y, x] for y, x in zip(self.df.Y_AXIS, self.df.X_AXIS)]
809 labeled_array, num_features = ndimage.label(mask, l_struc)
811 self.df.loc[:, "CLUSTID"] = [labeled_array[i[0], i[1]] for i in peaks]
813 # renumber "0" clusters
814 max_clustid = self.df["CLUSTID"].max()
815 n_of_zeros = len(self.df[self.df["CLUSTID"] == 0]["CLUSTID"])
816 self.df.loc[self.df[self.df["CLUSTID"] == 0].index, "CLUSTID"] = np.arange(
817 max_clustid + 1, n_of_zeros + max_clustid + 1, dtype=int
818 )
820 # count how many peaks per cluster
821 for ind, group in self.df.groupby("CLUSTID"):
822 self.df.loc[group.index, "MEMCNT"] = len(group)
824 self.df.loc[:, "color"] = self.df.apply(
825 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
826 axis=1,
827 )
829 return ClustersResult(labeled_array, num_features, mask, peaks)
831 def to_fuda(self):
832 fname = self.peaklist_path.parent / "params.fuda"
833 with open(self.peaklist_path.parent / "peaks.fuda", "w") as peaks_fuda:
834 for ass, f1_ppm, f2_ppm in zip(self.df.ASS, self.df.Y_PPM, self.df.X_PPM):
835 peaks_fuda.write(f"{ass}\t{f1_ppm:.3f}\t{f2_ppm:.3f}\n")
836 groups = self.df.groupby("CLUSTID")
837 fuda_params = Path(fname)
838 overlap_peaks = ""
840 for ind, group in groups:
841 if len(group) > 1:
842 overlap_peaks_str = ";".join(group.ASS)
843 overlap_peaks += f"OVERLAP_PEAKS=({overlap_peaks_str})\n"
845 fuda_file = textwrap.dedent(
846 f"""\
848# Read peaklist and spectrum info
849PEAKLIST=peaks.fuda
850SPECFILE={self.data_path}
851PARAMETERFILE=(bruker;vclist)
852ZCORR=ncyc
853NOISE={self.thres} # you'll need to adjust this
854BASELINE=N
855VERBOSELEVEL=5
856PRINTDATA=Y
857LM=(MAXFEV=250;TOL=1e-5)
858#Specify the default values. All values are in ppm:
859DEF_LINEWIDTH_F1={self.f1_radius}
860DEF_LINEWIDTH_F2={self.f2_radius}
861DEF_RADIUS_F1={self.f1_radius}
862DEF_RADIUS_F2={self.f2_radius}
863SHAPE=GLORE
864# OVERLAP PEAKS
865{overlap_peaks}"""
866 )
867 with open(fuda_params, "w") as f:
868 print(f"Writing FuDA file {fuda_file}")
869 f.write(fuda_file)
870 if self.verbose:
871 print(overlap_peaks)
874class ClustersResult:
875 """Class to store results of clusters function"""
877 def __init__(self, labeled_array, num_features, closed_data, peaks):
878 self._labeled_array = labeled_array
879 self._num_features = num_features
880 self._closed_data = closed_data
881 self._peaks = peaks
883 @property
884 def labeled_array(self):
885 return self._labeled_array
887 @property
888 def num_features(self):
889 return self._num_features
891 @property
892 def closed_data(self):
893 return self._closed_data
895 @property
896 def peaks(self):
897 return self._peaks
900class LoadData(Peaklist):
901 """Load peaklist data from peakipy .csv file output from either peakipy read or edit
903 read_peaklist is redefined to just read a .csv file
905 check_data_frame makes sure data frame is in good shape for setting up fits
907 """
909 def read_peaklist(self):
910 if self.peaklist_path.suffix == ".csv":
911 self.df = pd.read_csv(self.peaklist_path) # , comment="#")
913 elif self.peaklist_path.suffix == ".tab":
914 self.df = pd.read_csv(self.peaklist_path, sep="\t") # comment="#")
916 else:
917 self.df = pd.read_pickle(self.peaklist_path)
919 self._thres = threshold_otsu(self.data[0])
921 return self.df
923 def validate_peaklist(self):
924 self.df = pd.DataFrame(
925 [
926 PeaklistColumnsWithClusters(**i).model_dump()
927 for i in self.df.to_dict(orient="records")
928 ]
929 )
930 return self.df
932 def check_data_frame(self):
933 """
934 Ensure the data frame has all required columns and add necessary derived columns for fitting.
936 Returns
937 -------
938 pd.DataFrame
939 The modified DataFrame after validation.
940 """ # make diameter columns
941 if "X_DIAMETER_PPM" in self.df.columns:
942 pass
943 else:
944 self.df["X_DIAMETER_PPM"] = self.df["X_RADIUS_PPM"] * 2.0
945 self.df["Y_DIAMETER_PPM"] = self.df["Y_RADIUS_PPM"] * 2.0
947 # make a column to track edited peaks
948 if "Edited" in self.df.columns:
949 pass
950 else:
951 self.df["Edited"] = np.zeros(len(self.df), dtype=bool)
953 # create include column if it doesn't exist
954 if "include" in self.df.columns:
955 pass
956 else:
957 self.df["include"] = self.df.apply(lambda _: "yes", axis=1)
959 # color clusters
960 self.df["color"] = self.df.apply(
961 lambda x: Category20[20][int(x.CLUSTID) % 20] if x.MEMCNT > 1 else "black",
962 axis=1,
963 )
965 # get rid of unnamed columns
966 unnamed_cols = [i for i in self.df.columns if "Unnamed:" in i]
967 self.df = self.df.drop(columns=unnamed_cols)
969 def update_df(self):
970 """Slightly modified to retain previous configurations"""
971 # int point value
972 self.df["X_AXIS"] = self.df.X_PPM.apply(lambda x: self.uc_f2(x, "ppm"))
973 self.df["Y_AXIS"] = self.df.Y_PPM.apply(lambda x: self.uc_f1(x, "ppm"))
974 # decimal point value
975 self.df["X_AXISf"] = self.df.X_PPM.apply(lambda x: self.uc_f2.f(x, "ppm"))
976 self.df["Y_AXISf"] = self.df.Y_PPM.apply(lambda x: self.uc_f1.f(x, "ppm"))
977 # in case of missing values (should estimate though)
978 self.df["XW_HZ"] = self.df.XW_HZ.replace(np.nan, "20.0")
979 self.df["YW_HZ"] = self.df.YW_HZ.replace(np.nan, "20.0")
980 # convert linewidths to float
981 self.df["XW_HZ"] = self.df.XW_HZ.apply(lambda x: float(x))
982 self.df["YW_HZ"] = self.df.YW_HZ.apply(lambda x: float(x))
983 # convert Hz lw to points
984 self.df["XW"] = self.df.XW_HZ.apply(lambda x: x * self.pt_per_hz_f2)
985 self.df["YW"] = self.df.YW_HZ.apply(lambda x: x * self.pt_per_hz_f1)
986 # makes an assignment column
987 if self.fmt == "a2":
988 self.df["ASS"] = self.df.apply(
989 lambda i: "".join([i["Assign F1"], i["Assign F2"]]), axis=1
990 )
992 # make default values for X and Y radii for fit masks
993 # self.df["X_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f2_radius
994 # self.df["Y_RADIUS_PPM"] = np.zeros(len(self.df)) + self.f1_radius
995 self.df["X_RADIUS"] = self.df.X_RADIUS_PPM.apply(
996 lambda x: x * self.pt_per_ppm_f2
997 )
998 self.df["Y_RADIUS"] = self.df.Y_RADIUS_PPM.apply(
999 lambda x: x * self.pt_per_ppm_f1
1000 )
1001 # add include column
1002 if "include" in self.df.columns:
1003 pass
1004 else:
1005 self.df["include"] = self.df.apply(lambda x: "yes", axis=1)
1007 # check assignments for duplicates
1008 self.check_assignments()
1009 # check that peaks are within the bounds of the data
1010 self.check_peak_bounds()
1011 self.validate_peaklist()
1014def get_vclist(vclist, args):
1015 # read vclist
1016 if vclist is None:
1017 vclist = False
1018 elif vclist.exists():
1019 vclist_data = np.genfromtxt(vclist)
1020 args["vclist_data"] = vclist_data
1021 vclist = True
1022 else:
1023 raise Exception("vclist not found...")
1025 args["vclist"] = vclist
1026 return args