Rhodham96 commited on
Commit
6267f13
·
1 Parent(s): 37e6a38

changed format of columns

Browse files
Files changed (3) hide show
  1. .gitignore +0 -1
  2. app_old.py +191 -0
  3. preprocessing/pipeline_components.py +2 -0
.gitignore CHANGED
@@ -8,4 +8,3 @@ __pycache__/
8
  env/
9
  venv/
10
  .machine_learning/
11
-
 
8
  env/
9
  venv/
10
  .machine_learning/
 
app_old.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import joblib
4
+
5
+ def create_dataframe_from_user_input():
6
+ """
7
+ Collects user input for house features using Streamlit and
8
+ returns a Pandas DataFrame. The input fields are organized
9
+ into categorized sections for better usability.
10
+ """
11
+
12
+ # Define the lists of possible values for dropdown selections
13
+ type_list = ['HOUSE', 'APARTMENT']
14
+ subtype_list = ['HOUSE', 'APARTMENT', 'VILLA', 'APARTMENT_BLOCK', 'APARTMENT_GROUP',
15
+ 'MIXED_USE_BUILDING', 'GROUND_FLOOR', 'DUPLEX', 'HOUSE_GROUP',
16
+ 'FLAT_STUDIO', 'PENTHOUSE', 'EXCEPTIONAL_PROPERTY', 'MANSION',
17
+ 'TOWN_HOUSE', 'SERVICE_FLAT', 'BUNGALOW', 'KOT', 'COUNTRY_COTTAGE',
18
+ 'FARMHOUSE', 'LOFT', 'CHALET', 'TRIPLEX', 'CASTLE', 'OTHER_PROPERTY',
19
+ 'MANOR_HOUSE', 'PAVILION']
20
+ province_list = ['West Flanders', 'Antwerp', 'East Flanders', 'Brussels', 'Hainaut',
21
+ 'Liège', 'Flemish Brabant', 'Limburg', 'Walloon Brabant', 'Namur',
22
+ 'Luxembourg']
23
+ building_condition_list = ['GOOD', 'AS_NEW', 'TO_RENOVATE', 'TO_BE_DONE_UP',
24
+ 'JUST_RENOVATED', 'TO_RESTORE']
25
+ flood_zone_type_list = ['NON_FLOOD_ZONE', 'POSSIBLE_FLOOD_ZONE', 'RECOGNIZED_FLOOD_ZONE',
26
+ 'RECOGNIZED_N_CIRCUMSCRIBED_FLOOD_ZONE', 'CIRCUMSCRIBED_WATERSIDE_ZONE',
27
+ 'CIRCUMSCRIBED_FLOOD_ZONE', 'POSSIBLE_N_CIRCUMSCRIBED_FLOOD_ZONE',
28
+ 'POSSIBLE_N_CIRCUMSCRIBED_WATERSIDE_ZONE', 'RECOGNIZED_N_CIRCUMSCRIBED_WATERSIDE_ZONE']
29
+ heating_type_list = ['GAS', 'FUELOIL', 'ELECTRIC', 'PELLET', 'WOOD', 'SOLAR', 'CARBON']
30
+ kitchen_type_list = ['INSTALLED', 'HYPER_EQUIPPED', 'SEMI_EQUIPPED', 'NOT_INSTALLED',
31
+ 'USA_HYPER_EQUIPPED', 'USA_INSTALLED', 'USA_SEMI_EQUIPPED',
32
+ 'USA_UNINSTALLED']
33
+ garden_orientation_list = ['SOUTH', 'SOUTH_WEST', 'SOUTH_EAST', 'WEST', 'EAST',
34
+ 'NORTH_WEST', 'NORTH_EAST', 'NORTH']
35
+ terrace_orientation_list = ['SOUTH', 'SOUTH_WEST', 'SOUTH_EAST', 'WEST', 'EAST',
36
+ 'NORTH_WEST', 'NORTH_EAST', 'NORTH']
37
+ epc_score_list = ['B', 'C', 'D', 'A', 'F', 'E', 'G', 'A+', 'A++']
38
+
39
+ # Create Streamlit input fields
40
+ st.header("Enter House Information")
41
+
42
+ # --- Property Details ---
43
+ st.subheader("Property Details")
44
+ col1, col2 = st.columns(2)
45
+ with col1:
46
+ property_type = st.selectbox("Property Type", type_list, key='type')
47
+ property_subtype = st.selectbox("Subtype", subtype_list, key='subtype')
48
+ province = st.selectbox("Province", province_list, key='province')
49
+ locality = st.text_input("Locality", key='locality')
50
+ post_code = st.number_input("Post Code", min_value=1000, max_value=9999, step=1, key='postCode')
51
+ with col2:
52
+ building_condition = st.selectbox("Building Condition", building_condition_list, key='buildingCondition')
53
+ building_construction_year = st.number_input("Building Construction Year", min_value=1000, max_value=2024, step=1, key='buildingConstructionYear')
54
+ facade_count = st.number_input("Facade Count", min_value=0, step=1, key='facadeCount')
55
+ floor_count = st.number_input("Floor Count", min_value=0, step=1, key='floorCount')
56
+ flood_zone_type = st.selectbox("Flood Zone Type", flood_zone_type_list, key='floodZoneType')
57
+ epc_score = st.selectbox("EPC Score", epc_score_list, key='epcScore')
58
+
59
+ # --- Room Information ---
60
+ st.subheader("Room Information")
61
+ col3, col4 = st.columns(2)
62
+ with col3:
63
+ bedroom_count = st.number_input("Bedroom Count", min_value=0, step=1, key='bedroomCount')
64
+ bathroom_count = st.number_input("Bathroom Count", min_value=0, step=1, key='bathroomCount')
65
+ room_count = st.number_input("Room Count", min_value=0, step=1, key='roomCount')
66
+ has_attic = st.selectbox("Has Attic", ['Yes', 'No'], key='hasAttic')
67
+ has_basement = st.selectbox("Has Basement", ['Yes', 'No'], key='hasBasement')
68
+ has_dressing_room = st.selectbox("Has Dressing Room", ['Yes', 'No'], key='hasDressingRoom')
69
+ has_dining_room = st.selectbox("Has Dining Room", ['Yes', 'No'], key='hasDiningRoom')
70
+ dining_room_surface = st.number_input("Dining Room Surface (sqm)", min_value=0.0, key='diningRoomSurface')
71
+ with col4:
72
+ has_living_room = st.selectbox("Has Living Room", ['Yes', 'No'], key='hasLivingRoom')
73
+ living_room_surface = st.number_input("Living Room Surface (sqm)", min_value=0.0, key='livingRoomSurface')
74
+ kitchen_surface = st.number_input("Kitchen Surface (sqm)", min_value=0.0, key='kitchenSurface')
75
+ kitchen_type = st.selectbox("Kitchen Type", kitchen_type_list, key='kitchenType')
76
+ toilet_count = st.number_input("Toilet Count", min_value=0, step=1, key='toiletCount')
77
+ has_office = st.selectbox("Has Office", ['Yes', 'No'], key='hasOffice')
78
+ has_lift = st.selectbox("Has Lift", ['Yes', 'No'], key='hasLift')
79
+
80
+ # --- Surface Areas ---
81
+ st.subheader("Surface Areas")
82
+ col5, col6 = st.columns(2)
83
+ with col5:
84
+ habitable_surface = st.number_input("Habitable Surface (sqm)", min_value=0.0, key='habitableSurface')
85
+ land_surface = st.number_input("Land Surface (sqm)", min_value=0.0, key='landSurface')
86
+ garden_surface = st.number_input("Garden Surface (sqm)", min_value=0.0, key='gardenSurface')
87
+ with col6:
88
+ terrace_surface = st.number_input("Terrace Surface (sqm)", min_value=0.0, key='terraceSurface')
89
+ street_facade_width = st.number_input("Street Facade Width (m)", min_value=0.0, key='streetFacadeWidth')
90
+ monthly_cost = st.number_input("Monthly Cost (€)", min_value=0.0, key='monthlyCost')
91
+
92
+ # --- Outdoor Features ---
93
+ st.subheader("Outdoor Features")
94
+ col7, col8 = st.columns(2)
95
+ with col7:
96
+ has_garden = st.selectbox("Has Garden", ['Yes', 'No'], key='hasGarden')
97
+ garden_orientation = st.selectbox("Garden Orientation", garden_orientation_list, key='gardenOrientation')
98
+ has_balcony = st.selectbox("Has Balcony", ['Yes', 'No'], key='hasBalcony')
99
+ has_terrace = st.selectbox("Has Terrace", ['Yes', 'No'], key='hasTerrace')
100
+ terrace_orientation = st.selectbox("Terrace Orientation", terrace_orientation_list, key='terraceOrientation')
101
+ with col8:
102
+ parking_count_indoor = st.number_input("Indoor Parking Count", min_value=0, step=1, key='parkingCountIndoor')
103
+ parking_count_outdoor = st.number_input("Outdoor Parking Count", min_value=0, step=1, key='parkingCountOutdoor')
104
+ has_swimming_pool = st.selectbox("Has Swimming Pool", ['Yes', 'No'], key='hasSwimmingPool')
105
+
106
+ # --- Additional Features ---
107
+ st.subheader("Additional Features")
108
+ col9, col10 = st.columns(2)
109
+ with col9:
110
+ heating_type = st.selectbox("Heating Type", heating_type_list, key='heatingType')
111
+ has_heat_pump = st.selectbox("Has Heat Pump", ['Yes', 'No'], key='hasHeatPump')
112
+ has_photovoltaic_panels = st.selectbox("Has Photovoltaic Panels", ['Yes', 'No'], key='hasPhotovoltaicPanels')
113
+ has_thermic_panels = st.selectbox("Has Thermic Panels", ['Yes', 'No'], key='hasThermicPanels')
114
+ with col10:
115
+ has_air_conditioning = st.selectbox("Has Air Conditioning", ['Yes', 'No'], key='hasAirConditioning')
116
+ has_armored_door = st.selectbox("Has Armored Door", ['Yes', 'No'], key='hasArmoredDoor')
117
+ has_visiophone = st.selectbox("Has Visiophone", ['Yes', 'No'], key='hasVisiophone')
118
+ has_fireplace = st.selectbox("Has Fireplace", ['Yes', 'No'], key='hasFireplace')
119
+ accessible_disabled_people = st.selectbox("Accessible Disabled People", ['True', 'False'], key='accessibleDisabledPeople')
120
+
121
+ # Create a button to trigger DataFrame creation
122
+ if st.button("Predict"):
123
+ # Create the DataFrame
124
+ data = {
125
+ 'type': property_type,
126
+ 'subtype': property_subtype,
127
+ 'bedroomCount': bedroom_count,
128
+ 'bathroomCount': bathroom_count,
129
+ 'province': province,
130
+ 'locality': locality,
131
+ 'postCode': post_code,
132
+ 'habitableSurface': habitable_surface,
133
+ 'roomCount': room_count,
134
+ 'monthlyCost': monthly_cost,
135
+ 'hasAttic': has_attic == 'Yes',
136
+ 'hasBasement': has_basement == 'Yes',
137
+ 'hasDressingRoom': has_dressing_room == 'Yes',
138
+ 'diningRoomSurface': dining_room_surface,
139
+ 'hasDiningRoom': has_dining_room == 'Yes',
140
+ 'buildingCondition': building_condition,
141
+ 'buildingConstructionYear': building_construction_year,
142
+ 'facadeCount': facade_count,
143
+ 'floorCount': floor_count,
144
+ 'streetFacadeWidth': street_facade_width,
145
+ 'hasLift': has_lift == 'Yes',
146
+ 'floodZoneType': flood_zone_type,
147
+ 'heatingType': heating_type,
148
+ 'hasHeatPump': has_heat_pump == 'Yes',
149
+ 'hasPhotovoltaicPanels': has_photovoltaic_panels == 'Yes',
150
+ 'hasThermicPanels': has_thermic_panels == 'Yes',
151
+ 'kitchenSurface': kitchen_surface,
152
+ 'kitchenType': kitchen_type,
153
+ 'landSurface': land_surface,
154
+ 'hasLivingRoom': has_living_room == 'Yes',
155
+ 'livingRoomSurface': living_room_surface,
156
+ 'hasBalcony': has_balcony == 'Yes',
157
+ 'hasGarden': has_garden == 'Yes',
158
+ 'gardenSurface': garden_surface,
159
+ 'gardenOrientation': garden_orientation,
160
+ 'parkingCountIndoor': parking_count_indoor,
161
+ 'parkingCountOutdoor': parking_count_outdoor,
162
+ 'hasAirConditioning': has_air_conditioning == 'Yes',
163
+ 'hasArmoredDoor': has_armored_door == 'Yes',
164
+ 'hasVisiophone': has_visiophone == 'Yes',
165
+ 'hasOffice': has_office == 'Yes',
166
+ 'toiletCount': toilet_count,
167
+ 'hasSwimmingPool': has_swimming_pool == 'Yes',
168
+ 'hasFireplace': has_fireplace == 'Yes',
169
+ 'hasTerrace': has_terrace == 'Yes',
170
+ 'terraceSurface': terrace_surface,
171
+ 'terraceOrientation': terrace_orientation,
172
+ 'accessibleDisabledPeople': accessible_disabled_people == 'True',
173
+ 'epcScore': epc_score
174
+ }
175
+ df = pd.DataFrame(data, index=[0])
176
+ pipeline = joblib.load('saved/pipeline.pkl')
177
+ model = joblib.load('saved/model.pkl')
178
+ expected_columns = joblib.load('saved/columns.pkl')
179
+ df_test = pipeline.transform(df)
180
+ for col in expected_columns:
181
+ if col not in df_test.columns:
182
+ df_test[col] = 0
183
+
184
+ df_test = df_test[expected_columns]
185
+ preds = model.predict(df_test)
186
+ st.subheader("Price prediction")
187
+ st.markdown(f"<h1 style='text-align: center; color: red;'>{preds[0]:.2f} €</h1>", unsafe_allow_html=True)
188
+ return df
189
+
190
+ if __name__ == "__main__":
191
+ create_dataframe_from_user_input()
preprocessing/pipeline_components.py CHANGED
@@ -26,8 +26,10 @@ class DataCleaner(BaseEstimator, TransformerMixin):
26
  'hasTerrace': 'int', 'terraceSurface': 'float', 'terraceOrientation': 'str',
27
  'epcScore': 'str', 'facadeCount': 'int'
28
  }
 
29
 
30
  def fit(self, X, y=None):
 
31
  return self
32
 
33
  def transform(self, X):
 
26
  'hasTerrace': 'int', 'terraceSurface': 'float', 'terraceOrientation': 'str',
27
  'epcScore': 'str', 'facadeCount': 'int'
28
  }
29
+ self.kitchenType_mode = None
30
 
31
  def fit(self, X, y=None):
32
+ #self.kitchenType_mode = X['kitchenType'].mode()[0]
33
  return self
34
 
35
  def transform(self, X):