changed format of columns
Browse files- .gitignore +0 -1
- app_old.py +191 -0
- preprocessing/pipeline_components.py +2 -0
.gitignore
CHANGED
|
@@ -8,4 +8,3 @@ __pycache__/
|
|
| 8 |
env/
|
| 9 |
venv/
|
| 10 |
.machine_learning/
|
| 11 |
-
|
|
|
|
| 8 |
env/
|
| 9 |
venv/
|
| 10 |
.machine_learning/
|
|
|
app_old.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import joblib
|
| 4 |
+
|
| 5 |
+
def create_dataframe_from_user_input():
|
| 6 |
+
"""
|
| 7 |
+
Collects user input for house features using Streamlit and
|
| 8 |
+
returns a Pandas DataFrame. The input fields are organized
|
| 9 |
+
into categorized sections for better usability.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
# Define the lists of possible values for dropdown selections
|
| 13 |
+
type_list = ['HOUSE', 'APARTMENT']
|
| 14 |
+
subtype_list = ['HOUSE', 'APARTMENT', 'VILLA', 'APARTMENT_BLOCK', 'APARTMENT_GROUP',
|
| 15 |
+
'MIXED_USE_BUILDING', 'GROUND_FLOOR', 'DUPLEX', 'HOUSE_GROUP',
|
| 16 |
+
'FLAT_STUDIO', 'PENTHOUSE', 'EXCEPTIONAL_PROPERTY', 'MANSION',
|
| 17 |
+
'TOWN_HOUSE', 'SERVICE_FLAT', 'BUNGALOW', 'KOT', 'COUNTRY_COTTAGE',
|
| 18 |
+
'FARMHOUSE', 'LOFT', 'CHALET', 'TRIPLEX', 'CASTLE', 'OTHER_PROPERTY',
|
| 19 |
+
'MANOR_HOUSE', 'PAVILION']
|
| 20 |
+
province_list = ['West Flanders', 'Antwerp', 'East Flanders', 'Brussels', 'Hainaut',
|
| 21 |
+
'Liège', 'Flemish Brabant', 'Limburg', 'Walloon Brabant', 'Namur',
|
| 22 |
+
'Luxembourg']
|
| 23 |
+
building_condition_list = ['GOOD', 'AS_NEW', 'TO_RENOVATE', 'TO_BE_DONE_UP',
|
| 24 |
+
'JUST_RENOVATED', 'TO_RESTORE']
|
| 25 |
+
flood_zone_type_list = ['NON_FLOOD_ZONE', 'POSSIBLE_FLOOD_ZONE', 'RECOGNIZED_FLOOD_ZONE',
|
| 26 |
+
'RECOGNIZED_N_CIRCUMSCRIBED_FLOOD_ZONE', 'CIRCUMSCRIBED_WATERSIDE_ZONE',
|
| 27 |
+
'CIRCUMSCRIBED_FLOOD_ZONE', 'POSSIBLE_N_CIRCUMSCRIBED_FLOOD_ZONE',
|
| 28 |
+
'POSSIBLE_N_CIRCUMSCRIBED_WATERSIDE_ZONE', 'RECOGNIZED_N_CIRCUMSCRIBED_WATERSIDE_ZONE']
|
| 29 |
+
heating_type_list = ['GAS', 'FUELOIL', 'ELECTRIC', 'PELLET', 'WOOD', 'SOLAR', 'CARBON']
|
| 30 |
+
kitchen_type_list = ['INSTALLED', 'HYPER_EQUIPPED', 'SEMI_EQUIPPED', 'NOT_INSTALLED',
|
| 31 |
+
'USA_HYPER_EQUIPPED', 'USA_INSTALLED', 'USA_SEMI_EQUIPPED',
|
| 32 |
+
'USA_UNINSTALLED']
|
| 33 |
+
garden_orientation_list = ['SOUTH', 'SOUTH_WEST', 'SOUTH_EAST', 'WEST', 'EAST',
|
| 34 |
+
'NORTH_WEST', 'NORTH_EAST', 'NORTH']
|
| 35 |
+
terrace_orientation_list = ['SOUTH', 'SOUTH_WEST', 'SOUTH_EAST', 'WEST', 'EAST',
|
| 36 |
+
'NORTH_WEST', 'NORTH_EAST', 'NORTH']
|
| 37 |
+
epc_score_list = ['B', 'C', 'D', 'A', 'F', 'E', 'G', 'A+', 'A++']
|
| 38 |
+
|
| 39 |
+
# Create Streamlit input fields
|
| 40 |
+
st.header("Enter House Information")
|
| 41 |
+
|
| 42 |
+
# --- Property Details ---
|
| 43 |
+
st.subheader("Property Details")
|
| 44 |
+
col1, col2 = st.columns(2)
|
| 45 |
+
with col1:
|
| 46 |
+
property_type = st.selectbox("Property Type", type_list, key='type')
|
| 47 |
+
property_subtype = st.selectbox("Subtype", subtype_list, key='subtype')
|
| 48 |
+
province = st.selectbox("Province", province_list, key='province')
|
| 49 |
+
locality = st.text_input("Locality", key='locality')
|
| 50 |
+
post_code = st.number_input("Post Code", min_value=1000, max_value=9999, step=1, key='postCode')
|
| 51 |
+
with col2:
|
| 52 |
+
building_condition = st.selectbox("Building Condition", building_condition_list, key='buildingCondition')
|
| 53 |
+
building_construction_year = st.number_input("Building Construction Year", min_value=1000, max_value=2024, step=1, key='buildingConstructionYear')
|
| 54 |
+
facade_count = st.number_input("Facade Count", min_value=0, step=1, key='facadeCount')
|
| 55 |
+
floor_count = st.number_input("Floor Count", min_value=0, step=1, key='floorCount')
|
| 56 |
+
flood_zone_type = st.selectbox("Flood Zone Type", flood_zone_type_list, key='floodZoneType')
|
| 57 |
+
epc_score = st.selectbox("EPC Score", epc_score_list, key='epcScore')
|
| 58 |
+
|
| 59 |
+
# --- Room Information ---
|
| 60 |
+
st.subheader("Room Information")
|
| 61 |
+
col3, col4 = st.columns(2)
|
| 62 |
+
with col3:
|
| 63 |
+
bedroom_count = st.number_input("Bedroom Count", min_value=0, step=1, key='bedroomCount')
|
| 64 |
+
bathroom_count = st.number_input("Bathroom Count", min_value=0, step=1, key='bathroomCount')
|
| 65 |
+
room_count = st.number_input("Room Count", min_value=0, step=1, key='roomCount')
|
| 66 |
+
has_attic = st.selectbox("Has Attic", ['Yes', 'No'], key='hasAttic')
|
| 67 |
+
has_basement = st.selectbox("Has Basement", ['Yes', 'No'], key='hasBasement')
|
| 68 |
+
has_dressing_room = st.selectbox("Has Dressing Room", ['Yes', 'No'], key='hasDressingRoom')
|
| 69 |
+
has_dining_room = st.selectbox("Has Dining Room", ['Yes', 'No'], key='hasDiningRoom')
|
| 70 |
+
dining_room_surface = st.number_input("Dining Room Surface (sqm)", min_value=0.0, key='diningRoomSurface')
|
| 71 |
+
with col4:
|
| 72 |
+
has_living_room = st.selectbox("Has Living Room", ['Yes', 'No'], key='hasLivingRoom')
|
| 73 |
+
living_room_surface = st.number_input("Living Room Surface (sqm)", min_value=0.0, key='livingRoomSurface')
|
| 74 |
+
kitchen_surface = st.number_input("Kitchen Surface (sqm)", min_value=0.0, key='kitchenSurface')
|
| 75 |
+
kitchen_type = st.selectbox("Kitchen Type", kitchen_type_list, key='kitchenType')
|
| 76 |
+
toilet_count = st.number_input("Toilet Count", min_value=0, step=1, key='toiletCount')
|
| 77 |
+
has_office = st.selectbox("Has Office", ['Yes', 'No'], key='hasOffice')
|
| 78 |
+
has_lift = st.selectbox("Has Lift", ['Yes', 'No'], key='hasLift')
|
| 79 |
+
|
| 80 |
+
# --- Surface Areas ---
|
| 81 |
+
st.subheader("Surface Areas")
|
| 82 |
+
col5, col6 = st.columns(2)
|
| 83 |
+
with col5:
|
| 84 |
+
habitable_surface = st.number_input("Habitable Surface (sqm)", min_value=0.0, key='habitableSurface')
|
| 85 |
+
land_surface = st.number_input("Land Surface (sqm)", min_value=0.0, key='landSurface')
|
| 86 |
+
garden_surface = st.number_input("Garden Surface (sqm)", min_value=0.0, key='gardenSurface')
|
| 87 |
+
with col6:
|
| 88 |
+
terrace_surface = st.number_input("Terrace Surface (sqm)", min_value=0.0, key='terraceSurface')
|
| 89 |
+
street_facade_width = st.number_input("Street Facade Width (m)", min_value=0.0, key='streetFacadeWidth')
|
| 90 |
+
monthly_cost = st.number_input("Monthly Cost (€)", min_value=0.0, key='monthlyCost')
|
| 91 |
+
|
| 92 |
+
# --- Outdoor Features ---
|
| 93 |
+
st.subheader("Outdoor Features")
|
| 94 |
+
col7, col8 = st.columns(2)
|
| 95 |
+
with col7:
|
| 96 |
+
has_garden = st.selectbox("Has Garden", ['Yes', 'No'], key='hasGarden')
|
| 97 |
+
garden_orientation = st.selectbox("Garden Orientation", garden_orientation_list, key='gardenOrientation')
|
| 98 |
+
has_balcony = st.selectbox("Has Balcony", ['Yes', 'No'], key='hasBalcony')
|
| 99 |
+
has_terrace = st.selectbox("Has Terrace", ['Yes', 'No'], key='hasTerrace')
|
| 100 |
+
terrace_orientation = st.selectbox("Terrace Orientation", terrace_orientation_list, key='terraceOrientation')
|
| 101 |
+
with col8:
|
| 102 |
+
parking_count_indoor = st.number_input("Indoor Parking Count", min_value=0, step=1, key='parkingCountIndoor')
|
| 103 |
+
parking_count_outdoor = st.number_input("Outdoor Parking Count", min_value=0, step=1, key='parkingCountOutdoor')
|
| 104 |
+
has_swimming_pool = st.selectbox("Has Swimming Pool", ['Yes', 'No'], key='hasSwimmingPool')
|
| 105 |
+
|
| 106 |
+
# --- Additional Features ---
|
| 107 |
+
st.subheader("Additional Features")
|
| 108 |
+
col9, col10 = st.columns(2)
|
| 109 |
+
with col9:
|
| 110 |
+
heating_type = st.selectbox("Heating Type", heating_type_list, key='heatingType')
|
| 111 |
+
has_heat_pump = st.selectbox("Has Heat Pump", ['Yes', 'No'], key='hasHeatPump')
|
| 112 |
+
has_photovoltaic_panels = st.selectbox("Has Photovoltaic Panels", ['Yes', 'No'], key='hasPhotovoltaicPanels')
|
| 113 |
+
has_thermic_panels = st.selectbox("Has Thermic Panels", ['Yes', 'No'], key='hasThermicPanels')
|
| 114 |
+
with col10:
|
| 115 |
+
has_air_conditioning = st.selectbox("Has Air Conditioning", ['Yes', 'No'], key='hasAirConditioning')
|
| 116 |
+
has_armored_door = st.selectbox("Has Armored Door", ['Yes', 'No'], key='hasArmoredDoor')
|
| 117 |
+
has_visiophone = st.selectbox("Has Visiophone", ['Yes', 'No'], key='hasVisiophone')
|
| 118 |
+
has_fireplace = st.selectbox("Has Fireplace", ['Yes', 'No'], key='hasFireplace')
|
| 119 |
+
accessible_disabled_people = st.selectbox("Accessible Disabled People", ['True', 'False'], key='accessibleDisabledPeople')
|
| 120 |
+
|
| 121 |
+
# Create a button to trigger DataFrame creation
|
| 122 |
+
if st.button("Predict"):
|
| 123 |
+
# Create the DataFrame
|
| 124 |
+
data = {
|
| 125 |
+
'type': property_type,
|
| 126 |
+
'subtype': property_subtype,
|
| 127 |
+
'bedroomCount': bedroom_count,
|
| 128 |
+
'bathroomCount': bathroom_count,
|
| 129 |
+
'province': province,
|
| 130 |
+
'locality': locality,
|
| 131 |
+
'postCode': post_code,
|
| 132 |
+
'habitableSurface': habitable_surface,
|
| 133 |
+
'roomCount': room_count,
|
| 134 |
+
'monthlyCost': monthly_cost,
|
| 135 |
+
'hasAttic': has_attic == 'Yes',
|
| 136 |
+
'hasBasement': has_basement == 'Yes',
|
| 137 |
+
'hasDressingRoom': has_dressing_room == 'Yes',
|
| 138 |
+
'diningRoomSurface': dining_room_surface,
|
| 139 |
+
'hasDiningRoom': has_dining_room == 'Yes',
|
| 140 |
+
'buildingCondition': building_condition,
|
| 141 |
+
'buildingConstructionYear': building_construction_year,
|
| 142 |
+
'facadeCount': facade_count,
|
| 143 |
+
'floorCount': floor_count,
|
| 144 |
+
'streetFacadeWidth': street_facade_width,
|
| 145 |
+
'hasLift': has_lift == 'Yes',
|
| 146 |
+
'floodZoneType': flood_zone_type,
|
| 147 |
+
'heatingType': heating_type,
|
| 148 |
+
'hasHeatPump': has_heat_pump == 'Yes',
|
| 149 |
+
'hasPhotovoltaicPanels': has_photovoltaic_panels == 'Yes',
|
| 150 |
+
'hasThermicPanels': has_thermic_panels == 'Yes',
|
| 151 |
+
'kitchenSurface': kitchen_surface,
|
| 152 |
+
'kitchenType': kitchen_type,
|
| 153 |
+
'landSurface': land_surface,
|
| 154 |
+
'hasLivingRoom': has_living_room == 'Yes',
|
| 155 |
+
'livingRoomSurface': living_room_surface,
|
| 156 |
+
'hasBalcony': has_balcony == 'Yes',
|
| 157 |
+
'hasGarden': has_garden == 'Yes',
|
| 158 |
+
'gardenSurface': garden_surface,
|
| 159 |
+
'gardenOrientation': garden_orientation,
|
| 160 |
+
'parkingCountIndoor': parking_count_indoor,
|
| 161 |
+
'parkingCountOutdoor': parking_count_outdoor,
|
| 162 |
+
'hasAirConditioning': has_air_conditioning == 'Yes',
|
| 163 |
+
'hasArmoredDoor': has_armored_door == 'Yes',
|
| 164 |
+
'hasVisiophone': has_visiophone == 'Yes',
|
| 165 |
+
'hasOffice': has_office == 'Yes',
|
| 166 |
+
'toiletCount': toilet_count,
|
| 167 |
+
'hasSwimmingPool': has_swimming_pool == 'Yes',
|
| 168 |
+
'hasFireplace': has_fireplace == 'Yes',
|
| 169 |
+
'hasTerrace': has_terrace == 'Yes',
|
| 170 |
+
'terraceSurface': terrace_surface,
|
| 171 |
+
'terraceOrientation': terrace_orientation,
|
| 172 |
+
'accessibleDisabledPeople': accessible_disabled_people == 'True',
|
| 173 |
+
'epcScore': epc_score
|
| 174 |
+
}
|
| 175 |
+
df = pd.DataFrame(data, index=[0])
|
| 176 |
+
pipeline = joblib.load('saved/pipeline.pkl')
|
| 177 |
+
model = joblib.load('saved/model.pkl')
|
| 178 |
+
expected_columns = joblib.load('saved/columns.pkl')
|
| 179 |
+
df_test = pipeline.transform(df)
|
| 180 |
+
for col in expected_columns:
|
| 181 |
+
if col not in df_test.columns:
|
| 182 |
+
df_test[col] = 0
|
| 183 |
+
|
| 184 |
+
df_test = df_test[expected_columns]
|
| 185 |
+
preds = model.predict(df_test)
|
| 186 |
+
st.subheader("Price prediction")
|
| 187 |
+
st.markdown(f"<h1 style='text-align: center; color: red;'>{preds[0]:.2f} €</h1>", unsafe_allow_html=True)
|
| 188 |
+
return df
|
| 189 |
+
|
| 190 |
+
if __name__ == "__main__":
|
| 191 |
+
create_dataframe_from_user_input()
|
preprocessing/pipeline_components.py
CHANGED
|
@@ -26,8 +26,10 @@ class DataCleaner(BaseEstimator, TransformerMixin):
|
|
| 26 |
'hasTerrace': 'int', 'terraceSurface': 'float', 'terraceOrientation': 'str',
|
| 27 |
'epcScore': 'str', 'facadeCount': 'int'
|
| 28 |
}
|
|
|
|
| 29 |
|
| 30 |
def fit(self, X, y=None):
|
|
|
|
| 31 |
return self
|
| 32 |
|
| 33 |
def transform(self, X):
|
|
|
|
| 26 |
'hasTerrace': 'int', 'terraceSurface': 'float', 'terraceOrientation': 'str',
|
| 27 |
'epcScore': 'str', 'facadeCount': 'int'
|
| 28 |
}
|
| 29 |
+
self.kitchenType_mode = None
|
| 30 |
|
| 31 |
def fit(self, X, y=None):
|
| 32 |
+
#self.kitchenType_mode = X['kitchenType'].mode()[0]
|
| 33 |
return self
|
| 34 |
|
| 35 |
def transform(self, X):
|