|  | import pandas as pd | 
					
						
						|  | from scipy.io import loadmat, savemat | 
					
						
						|  | import numpy as np | 
					
						
						|  |  | 
					
						
						|  | def split_features(data_name, df, result_file, feature_path, layer_name): | 
					
						
						|  | data = loadmat(result_file) | 
					
						
						|  | all_variables = {} | 
					
						
						|  | for key, value in data.items(): | 
					
						
						|  |  | 
					
						
						|  | if not key.startswith('__') and not key.endswith('__'): | 
					
						
						|  | all_variables[key] = value | 
					
						
						|  |  | 
					
						
						|  | if data_name == 'konvid_1k': | 
					
						
						|  | for i in range(len(all_variables['Test_videos_Median_model'])): | 
					
						
						|  | test_vids = all_variables['Test_videos_Median_model'][i] | 
					
						
						|  | test_vids = test_vids.tolist() | 
					
						
						|  | else: | 
					
						
						|  | test_vids = [] | 
					
						
						|  | for i in range(len(all_variables['Test_videos_Median_model'])): | 
					
						
						|  | vid = all_variables['Test_videos_Median_model'][i].strip() | 
					
						
						|  | test_vids.append(vid) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if data_name == 'youtube_ugc': | 
					
						
						|  | grey_df = pd.read_csv(f'{metadata_path}/greyscale_report/{data_name.upper()}_greyscale_metadata.csv') | 
					
						
						|  | grey_indices = grey_df.iloc[:, 0].tolist() | 
					
						
						|  | df = df.drop(index=grey_indices).reset_index(drop=True) | 
					
						
						|  |  | 
					
						
						|  | all_vids = df.iloc[:, 0].tolist() | 
					
						
						|  | print(all_vids) | 
					
						
						|  | print(test_vids) | 
					
						
						|  | train_vids = list(set(all_vids) - set(test_vids)) | 
					
						
						|  | print(len(test_vids)) | 
					
						
						|  | print(len(train_vids)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | train_df = df[df.iloc[:, 0].isin(train_vids)] | 
					
						
						|  | test_df = df[df.iloc[:, 0].isin(test_vids)] | 
					
						
						|  | print(len(test_df)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | sorted_train_df = pd.DataFrame({'vid': train_df.iloc[:, 0],  'framerate': train_df['framerate'], 'MOS': train_df['mos']}) | 
					
						
						|  | sorted_test_df = pd.DataFrame({'vid': test_df.iloc[:, 0], 'framerate': test_df['framerate'], 'MOS': test_df['mos']}) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | data = loadmat(f'{feature_path}{layer_name}/original_features/{network_name}_{data_name}_original_features.mat') | 
					
						
						|  | features = data[f'{data_name}'] | 
					
						
						|  |  | 
					
						
						|  | if data_name == 'youtube_ugc': | 
					
						
						|  | features = np.delete(features, grey_indices, axis=0) | 
					
						
						|  |  | 
					
						
						|  | train_features = features[train_df.index] | 
					
						
						|  | test_features = features[test_df.index] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | sorted_train_df.to_csv(f'{metadata_path}mos_files/{data_name}_MOS_train.csv', index=False) | 
					
						
						|  | sorted_test_df.to_csv(f'{metadata_path}mos_files/{data_name}_MOS_test.csv', index=False) | 
					
						
						|  | savemat(f'{feature_path}{layer_name}/relaxvqa_{data_name}_original_train_features.mat', {f'{data_name}_train_features': train_features}) | 
					
						
						|  | savemat(f'{feature_path}{layer_name}/relaxvqa_{data_name}_original_test_features.mat', {f'{data_name}_test_features': test_features}) | 
					
						
						|  |  | 
					
						
						|  | return train_features, test_features, test_vids | 
					
						
						|  |  | 
					
						
						|  | if __name__ == '__main__': | 
					
						
						|  | metadata_path = '../../metadata/' | 
					
						
						|  | feature_path = '../../features_merged_frag/' | 
					
						
						|  | result_path = f'../../log/result/' | 
					
						
						|  |  | 
					
						
						|  | data_name = 'cvd_2014' | 
					
						
						|  | network_name = 'relaxvqa' | 
					
						
						|  | layer_name = 'pool' | 
					
						
						|  | model_name = 'Mlp' | 
					
						
						|  | select_criteria = 'byrmse' | 
					
						
						|  |  | 
					
						
						|  | df = pd.read_csv(f'{metadata_path}/{data_name.upper()}_metadata.csv') | 
					
						
						|  | result_file = f'{result_path}{data_name}_{network_name}_{select_criteria}.mat' | 
					
						
						|  | train_features, test_features, test_vids = split_features(data_name, df, result_file, feature_path, layer_name) | 
					
						
						|  |  |