Coverage for peakipy/utils.py: 99%

123 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-09-14 14:49 -0400

1import sys 

2import json 

3from datetime import datetime 

4from pathlib import Path 

5from typing import List 

6 

7from rich import print 

8from rich.table import Table 

9 

10# for printing dataframes 

11peaklist_columns_for_printing = ["INDEX", "ASS", "X_PPM", "Y_PPM", "CLUSTID", "MEMCNT"] 

12bad_column_selection = [ 

13 "clustid", 

14 "amp", 

15 "center_x_ppm", 

16 "center_y_ppm", 

17 "fwhm_x_hz", 

18 "fwhm_y_hz", 

19 "lineshape", 

20] 

21bad_color_selection = [ 

22 "green", 

23 "blue", 

24 "yellow", 

25 "red", 

26 "yellow", 

27 "red", 

28 "magenta", 

29] 

30 

31 

32def mkdir_tmp_dir(base_path: Path = Path("./")): 

33 tmp_dir = base_path / "tmp" 

34 tmp_dir.mkdir(exist_ok=True) 

35 return tmp_dir 

36 

37 

38def create_log_path(base_path: Path = Path("./")): 

39 return base_path / "run_log.txt" 

40 

41 

42def run_log(log_name="run_log.txt"): 

43 """Write log file containing time script was run and with which arguments""" 

44 with open(log_name, "a") as log: 

45 sys_argv = sys.argv 

46 sys_argv[0] = Path(sys_argv[0]).name 

47 run_args = " ".join(sys_argv) 

48 time_stamp = datetime.now() 

49 time_stamp = time_stamp.strftime("%A %d %B %Y at %H:%M") 

50 log.write(f"# Script run on {time_stamp}:\n{run_args}\n") 

51 

52 

53def df_to_rich_table(df, title: str, columns: List[str], styles: str): 

54 """Print dataframe using rich library 

55 

56 Parameters 

57 ---------- 

58 df : pandas.DataFrame 

59 title : str 

60 title of table 

61 columns : List[str] 

62 list of column names (must be in df) 

63 styles : List[str] 

64 list of styles in same order as columns 

65 """ 

66 table = Table(title=title) 

67 for col, style in zip(columns, styles): 

68 table.add_column(col, style=style) 

69 for _, row in df.iterrows(): 

70 row = row[columns].values 

71 str_row = [] 

72 for i in row: 

73 match i: 

74 case str(): 

75 str_row.append(f"{i}") 

76 case float() if i > 1e5: 

77 str_row.append(f"{i:.1e}") 

78 case float(): 

79 str_row.append(f"{i:.3f}") 

80 case bool(): 

81 str_row.append(f"{i}") 

82 case int(): 

83 str_row.append(f"{i}") 

84 table.add_row(*str_row) 

85 return table 

86 

87 

88def load_config(config_path): 

89 if config_path.exists(): 

90 with open(config_path) as opened_config: 

91 config_dic = json.load(opened_config) 

92 return config_dic 

93 else: 

94 return {} 

95 

96 

97def write_config(config_path, config_dic): 

98 with open(config_path, "w") as config: 

99 config.write(json.dumps(config_dic, sort_keys=True, indent=4)) 

100 

101 

102def update_config_file(config_path, config_kvs): 

103 config_dic = load_config(config_path) 

104 config_dic.update(config_kvs) 

105 write_config(config_path, config_dic) 

106 return config_dic 

107 

108 

109def update_args_with_values_from_config_file(args, config_path="peakipy.config"): 

110 """read a peakipy config file, extract params and update args dict 

111 

112 :param args: dict containing params extracted from docopt command line 

113 :type args: dict 

114 :param config_path: path to peakipy config file [default: peakipy.config] 

115 :type config_path: str 

116 

117 :returns args: updated args dict 

118 :rtype args: dict 

119 :returns config: dict that resulted from reading config file 

120 :rtype config: dict 

121 

122 """ 

123 # update args with values from peakipy.config file 

124 config_path = Path(config_path) 

125 if config_path.exists(): 

126 try: 

127 config = load_config(config_path) 

128 print( 

129 f"[green]Using config file with dims [yellow]{config.get('dims')}[/yellow][/green]" 

130 ) 

131 args["dims"] = config.get("dims", (0, 1, 2)) 

132 noise = config.get("noise") 

133 if noise: 

134 noise = float(noise) 

135 

136 colors = config.get("colors", ["#5e3c99", "#e66101"]) 

137 except json.decoder.JSONDecodeError: 

138 print( 

139 "[red]Your peakipy.config file is corrupted - maybe your JSON is not correct...[/red]" 

140 ) 

141 print("[red]Not using[/red]") 

142 noise = False 

143 colors = args.get("colors", ("#5e3c99", "#e66101")) 

144 config = {} 

145 else: 

146 print( 

147 "[red]No peakipy.config found - maybe you need to generate one with peakipy read or see docs[/red]" 

148 ) 

149 noise = False 

150 colors = args.get("colors", ("#5e3c99", "#e66101")) 

151 config = {} 

152 

153 args["noise"] = noise 

154 args["colors"] = colors 

155 

156 return args, config 

157 

158 

159def update_linewidths_from_hz_to_points(peakipy_data): 

160 """in case they were adjusted when running edit.py""" 

161 peakipy_data.df["XW"] = peakipy_data.df.XW_HZ * peakipy_data.pt_per_hz_f2 

162 peakipy_data.df["YW"] = peakipy_data.df.YW_HZ * peakipy_data.pt_per_hz_f1 

163 return peakipy_data 

164 

165 

166def update_peak_positions_from_ppm_to_points(peakipy_data): 

167 # convert peak positions from ppm to points in case they were adjusted running edit.py 

168 peakipy_data.df["X_AXIS"] = peakipy_data.df.X_PPM.apply( 

169 lambda x: peakipy_data.uc_f2(x, "PPM") 

170 ) 

171 peakipy_data.df["Y_AXIS"] = peakipy_data.df.Y_PPM.apply( 

172 lambda x: peakipy_data.uc_f1(x, "PPM") 

173 ) 

174 peakipy_data.df["X_AXISf"] = peakipy_data.df.X_PPM.apply( 

175 lambda x: peakipy_data.uc_f2.f(x, "PPM") 

176 ) 

177 peakipy_data.df["Y_AXISf"] = peakipy_data.df.Y_PPM.apply( 

178 lambda x: peakipy_data.uc_f1.f(x, "PPM") 

179 ) 

180 return peakipy_data 

181 

182 

183def save_data(df, output_name): 

184 suffix = output_name.suffix 

185 

186 if suffix == ".csv": 

187 df.to_csv(output_name, float_format="%.4f", index=False) 

188 

189 elif suffix == ".tab": 

190 df.to_csv(output_name, sep="\t", float_format="%.4f", index=False) 

191 

192 else: 

193 df.to_pickle(output_name) 

194 

195 

196def check_data_shape_is_consistent_with_dims(peakipy_data): 

197 # check data shape is consistent with dims 

198 if len(peakipy_data.dims) != len(peakipy_data.data.shape): 

199 print( 

200 f"Dims are {peakipy_data.dims} while data shape is {peakipy_data.data.shape}?" 

201 ) 

202 exit() 

203 

204 

205def check_for_include_column_and_add_if_missing(peakipy_data): 

206 # only include peaks with 'include' 

207 if "include" in peakipy_data.df.columns: 

208 pass 

209 else: 

210 # for compatibility 

211 peakipy_data.df["include"] = peakipy_data.df.apply(lambda _: "yes", axis=1) 

212 return peakipy_data 

213 

214 

215def remove_excluded_peaks(peakipy_data): 

216 if len(peakipy_data.df[peakipy_data.df.include != "yes"]) > 0: 

217 excluded = peakipy_data.df[peakipy_data.df.include != "yes"][ 

218 peaklist_columns_for_printing 

219 ] 

220 table = df_to_rich_table( 

221 excluded, 

222 title="[yellow] Excluded peaks [/yellow]", 

223 columns=excluded.columns, 

224 styles=["yellow" for i in excluded.columns], 

225 ) 

226 print(table) 

227 peakipy_data.df = peakipy_data.df[peakipy_data.df.include == "yes"] 

228 return peakipy_data 

229 

230 

231def warn_if_trying_to_fit_large_clusters(max_cluster_size, peakipy_data): 

232 if max_cluster_size is None: 

233 max_cluster_size = peakipy_data.df.MEMCNT.max() 

234 if peakipy_data.df.MEMCNT.max() > 10: 

235 print( 

236 f"""[red] 

237 ################################################################## 

238 You have some clusters of as many as {max_cluster_size} peaks. 

239 You may want to consider reducing the size of your clusters as the 

240 fits will struggle. 

241 

242 Otherwise you can use the --max-cluster-size flag to exclude large 

243 clusters 

244 ################################################################## 

245 [/red]""" 

246 ) 

247 else: 

248 max_cluster_size = max_cluster_size 

249 return max_cluster_size