Smith42 commited on
Commit
55dabfb
·
1 Parent(s): d21f6ff
Files changed (2) hide show
  1. app.py +165 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import plotly.subplots as sp
6
+ from datasets import load_dataset
7
+ import umap
8
+
9
+ # Load dataset function
10
+ def load_dataset_from_hub(dataset_name, split="test"):
11
+ try:
12
+ return load_dataset(dataset_name, split=split), None
13
+ except Exception as e:
14
+ return None, str(e)
15
+
16
+ # Create visualization function
17
+ def create_visualization(split, color_col, log):
18
+ # Load the dataset
19
+ dataset, error = load_dataset_from_hub("Smith42/galaxies_with_embeddings", split)
20
+ if error:
21
+ return None, f"Error loading dataset: {error}"
22
+
23
+ try:
24
+ embedding_cols = ["p16k00_pca", "p16k01_pca", "p16k10_pca"]
25
+ # Extract embeddings and color values
26
+ embeddings = dataset.select_columns(embedding_cols)
27
+ colors = np.array(dataset[color_col], dtype=float)
28
+ if log: colors = np.log(colors)
29
+ fig = sp.make_subplots(cols=3, subplot_titles=["k = 0%", "k = 1%", "k = 10%"])
30
+
31
+ ii = 0
32
+ for col in range(1, 4):
33
+ embedding_col = embedding_cols[ii]
34
+ emb_ar = np.array(embeddings[embedding_col])
35
+ df = pd.DataFrame({
36
+ 'x': emb_ar[:, 0],
37
+ 'y': emb_ar[:, 1],
38
+ 'color': colors
39
+ }).dropna()
40
+ scatter = px.scatter(df, x='x', y='y', color='color')
41
+ fig.add_trace(scatter.data[0], row=1, col=col)
42
+ ii = ii + 1
43
+
44
+ return fig, None
45
+ except Exception as e:
46
+ return None, f"Error creating viz: {str(e)}"
47
+
48
+ property_groups = {
49
+ "Basic Identifiers": [
50
+ "dr8_id", "ra", "dec", "brickid", "objid", "file_name", "iauname"
51
+ ],
52
+
53
+ "Galaxy Morphology": [
54
+ "smooth-or-featured_smooth_fraction", "smooth-or-featured_featured-or-disk_fraction",
55
+ "smooth-or-featured_artifact_fraction", "disk-edge-on_yes_fraction", "disk-edge-on_no_fraction",
56
+ "has-spiral-arms_yes_fraction", "has-spiral-arms_no_fraction",
57
+ "bar_strong_fraction", "bar_weak_fraction", "bar_no_fraction",
58
+ "bulge-size_dominant_fraction", "bulge-size_large_fraction", "bulge-size_moderate_fraction",
59
+ "bulge-size_small_fraction", "bulge-size_none_fraction",
60
+ "how-rounded_round_fraction", "how-rounded_in-between_fraction", "how-rounded_cigar-shaped_fraction",
61
+ "edge-on-bulge_boxy_fraction", "edge-on-bulge_none_fraction", "edge-on-bulge_rounded_fraction",
62
+ "spiral-winding_tight_fraction", "spiral-winding_medium_fraction", "spiral-winding_loose_fraction",
63
+ "spiral-arm-count_1_fraction", "spiral-arm-count_2_fraction", "spiral-arm-count_3_fraction",
64
+ "spiral-arm-count_4_fraction", "spiral-arm-count_more-than-4_fraction", "spiral-arm-count_cant-tell_fraction",
65
+ "merging_none_fraction", "merging_minor-disturbance_fraction", "merging_major-disturbance_fraction",
66
+ "merging_merger_fraction"
67
+ ],
68
+
69
+ "Physical Size Parameters": [
70
+ "est_petro_th50", "est_petro_th50_kpc", "petro_theta", "petro_th50", "petro_th90",
71
+ "petro_phi50", "petro_phi90", "petro_ba50", "petro_ba90",
72
+ "elpetro_ba", "elpetro_phi", "elpetro_flux_r", "elpetro_theta_r"
73
+ ],
74
+
75
+ "Photometric Properties": [
76
+ "mag_r_desi", "mag_g_desi", "mag_z_desi",
77
+ "mag_f", "mag_n", "mag_u", "mag_g", "mag_r", "mag_i", "mag_z",
78
+ "u_minus_r", "sersic_n", "sersic_ba", "sersic_phi",
79
+ "elpetro_absmag_f", "elpetro_absmag_n", "elpetro_absmag_u",
80
+ "elpetro_absmag_g", "elpetro_absmag_r", "elpetro_absmag_i", "elpetro_absmag_z",
81
+ "sersic_nmgy_f", "sersic_nmgy_n", "sersic_nmgy_u", "sersic_nmgy_g",
82
+ "sersic_nmgy_r", "sersic_nmgy_i", "sersic_nmgy_z"
83
+ ],
84
+
85
+ "Mass and Redshift": [
86
+ "elpetro_mass", "elpetro_mass_log", "redshift", "redshift_nsa",
87
+ "redshift_ossy", "photo_z", "photo_zerr", "spec_z"
88
+ ],
89
+
90
+ "Star Formation Properties": [
91
+ "fibre_sfr_avg", "fibre_sfr_entropy", "fibre_sfr_median", "fibre_sfr_mode",
92
+ "fibre_sfr_p16", "fibre_sfr_p2p5", "fibre_sfr_p84", "fibre_sfr_p97p5",
93
+ "fibre_ssfr_avg", "fibre_ssfr_entropy", "fibre_ssfr_median", "fibre_ssfr_mode",
94
+ "fibre_ssfr_p16", "fibre_ssfr_p2p5", "fibre_ssfr_p84", "fibre_ssfr_p97p5",
95
+ "total_ssfr_avg", "total_ssfr_entropy", "total_ssfr_flag", "total_ssfr_median",
96
+ "total_ssfr_mode", "total_ssfr_p16", "total_ssfr_p2p5", "total_ssfr_p84",
97
+ "total_ssfr_p97p5", "total_sfr_avg", "total_sfr_entropy", "total_sfr_flag",
98
+ "total_sfr_median", "total_sfr_mode", "total_sfr_p16", "total_sfr_p2p5",
99
+ "total_sfr_p84", "total_sfr_p97p5"
100
+ ],
101
+
102
+ "AGN Properties": [
103
+ "log_l_oiii", "fwhm", "e_fwhm", "equiv_width", "log_l_ha",
104
+ "log_m_bh", "upper_e_log_m_bh", "lower_e_log_m_bh", "log_bolometric_l"
105
+ ],
106
+
107
+ "HI Properties": [
108
+ "W50", "sigW", "W20", "HIflux", "sigflux", "SNR", "RMS",
109
+ "Dist", "sigDist", "logMH", "siglogMH"
110
+ ],
111
+
112
+ "PhotoZ Catalog": [
113
+ "photoz_id", "ra_photoz", "dec_photoz", "mag_abs_g_photoz", "mag_abs_r_photoz",
114
+ "mag_abs_z_photoz", "mass_inf_photoz", "mass_med_photoz", "mass_sup_photoz",
115
+ "sfr_inf_photoz", "sfr_sup_photoz", "ssfr_inf_photoz", "ssfr_med_photoz",
116
+ "ssfr_sup_photoz", "sky_separation_arcsec_from_photoz"
117
+ ]
118
+ }
119
+
120
+ # Define the Gradio interface
121
+ with gr.Blocks(title="Galaxy embeddings") as demo:
122
+ gr.Markdown("# Sparse galaxy embeddings")
123
+
124
+ with gr.Row():
125
+ split_input = gr.Dropdown(
126
+ label="Split",
127
+ value="test",
128
+ choices=["test", "validation"]
129
+ )
130
+ group_dropdown = gr.Dropdown(
131
+ label="Property category",
132
+ choices=list(property_groups.keys()),
133
+ value=list(property_groups.keys())[0]
134
+ )
135
+ color_col = gr.Dropdown(
136
+ label="Property",
137
+ choices=property_groups[list(property_groups.keys())[0]]
138
+ )
139
+ log = gr.Checkbox(
140
+ label="Take log?",
141
+ value=False
142
+ )
143
+ visualize_btn = gr.Button("Let's go!")
144
+
145
+ error_output = gr.Textbox(label="Errors", visible=False)
146
+
147
+ def update_properties(group):
148
+ return gr.update(choices=property_groups[group], value=property_groups[group][0])
149
+
150
+ group_dropdown.change(
151
+ fn=update_properties,
152
+ inputs=[group_dropdown],
153
+ outputs=[color_col]
154
+ )
155
+
156
+ with gr.Row():
157
+ plot_output = gr.Plot(label="Visualization")
158
+
159
+ visualize_btn.click(
160
+ fn=create_visualization,
161
+ inputs=[split_input, color_col, log],
162
+ outputs=[plot_output, error_output]
163
+ )
164
+
165
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.19.*
2
+ numpy==1.26.*
3
+ pandas==2.1.*
4
+ plotly==5.18.*
5
+ datasets==2.17.*
6
+ umap-learn==0.5.*
7
+ scikit-learn==1.4.*