Spaces:
Running
Running
David Hrachovy
commited on
Commit
·
190ecd4
1
Parent(s):
d972137
Update
Browse files- .gitignore +3 -1
- app.py +71 -49
- estate.db +1 -1
- init_db.py +59 -51
- requirements.in +2 -5
- requirements.txt +0 -0
.gitignore
CHANGED
@@ -46,4 +46,6 @@ lib/
|
|
46 |
# Scrapy
|
47 |
.scrapy/
|
48 |
|
49 |
-
_*.py
|
|
|
|
|
|
46 |
# Scrapy
|
47 |
.scrapy/
|
48 |
|
49 |
+
_*.py
|
50 |
+
*.db-shm
|
51 |
+
*.db-wal
|
app.py
CHANGED
@@ -1,35 +1,80 @@
|
|
1 |
# Standard library imports
|
2 |
-
import os
|
|
|
|
|
|
|
3 |
|
4 |
# Third-party imports
|
5 |
import gradio as gr
|
6 |
from dotenv import load_dotenv
|
7 |
-
|
8 |
-
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
12 |
-
from langchain_openai import ChatOpenAI
|
13 |
|
14 |
# Local imports
|
15 |
-
from init_db import Project
|
16 |
|
17 |
# Load environment variables
|
18 |
load_dotenv()
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
-
# Initialize model and database
|
27 |
-
model = ChatOpenAI(model="o3-mini-2025-01-31", streaming=True)
|
28 |
-
db = SQLDatabase.from_uri("sqlite:///estate.db")
|
29 |
|
30 |
-
# Set up SQL toolkit and tools
|
31 |
-
toolkit = SQLDatabaseToolkit(db=db, llm=model)
|
32 |
-
tools = toolkit.get_tools()
|
33 |
|
34 |
sql_distance_query = """
|
35 |
SELECT
|
@@ -49,40 +94,17 @@ ORDER BY distance ASC
|
|
49 |
LIMIT 5;
|
50 |
"""
|
51 |
|
52 |
-
# Get the OpenAI tools agent prompt
|
53 |
-
prompt = ChatPromptTemplate.from_messages([
|
54 |
-
("system", f"You are a helpful assistant that presents information about real estate projects in database for investors. Use 'IS NOT NULL' when necessary. Use Czech city names. Do not present SQL or JSON. Use simple language. Do not make up information or hallucinate. If final result rows are empty respond that you could not find records matching criteria. Table structure: {db_description}. This JSON structure info can help you: {json_structure}."),
|
55 |
-
("placeholder", "{chat_history}"),
|
56 |
-
("human", "{input}"),
|
57 |
-
("placeholder", "{agent_scratchpad}"),
|
58 |
-
])
|
59 |
-
|
60 |
-
# Create the agent with OpenAI tools format
|
61 |
-
agent = create_openai_tools_agent(
|
62 |
-
llm=model,
|
63 |
-
tools=tools,
|
64 |
-
prompt=prompt
|
65 |
-
)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
agent=agent,
|
70 |
-
tools=tools,
|
71 |
-
verbose=True
|
72 |
-
)
|
73 |
|
74 |
def chat_with_sql(message, history):
|
75 |
try:
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
history_langchain_format.append(AIMessage(content=msg['content']))
|
82 |
-
history_langchain_format.append(HumanMessage(content=message))
|
83 |
-
response = agent_executor.invoke({"input": message, "history": history_langchain_format})
|
84 |
-
for i in range(len(response["output"])):
|
85 |
-
yield response["output"][:i+1]
|
86 |
except Exception as e:
|
87 |
yield f"Error: {str(e)}"
|
88 |
|
|
|
1 |
# Standard library imports
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import sqlite3
|
5 |
+
from typing import List, Tuple, Optional
|
6 |
|
7 |
# Third-party imports
|
8 |
import gradio as gr
|
9 |
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
# SmolaGents imports
|
12 |
+
from smolagents import CodeAgent, LiteLLMModel, tool
|
13 |
+
from smolagents.agent_types import AgentText
|
|
|
|
|
14 |
|
15 |
# Local imports
|
16 |
+
from init_db import Project
|
17 |
|
18 |
# Load environment variables
|
19 |
load_dotenv()
|
20 |
|
21 |
+
@tool
|
22 |
+
def sql_engine(query: str) -> List[Tuple]:
|
23 |
+
"""
|
24 |
+
Allows you to perform SQL queries on the table in SQLite database. Returns list of results.
|
25 |
+
The table is named 'project'. Its description is as follows:
|
26 |
+
Columns:
|
27 |
+
- url: VARCHAR(255) - URL of the project
|
28 |
+
- title: VARCHAR(255) - Title of the project
|
29 |
+
- deposit: INTEGER - Percentage of payment before completion
|
30 |
+
- min_price: INTEGER - Lowest available apartment price in CZK with VAT
|
31 |
+
- status: VARCHAR(255) - Status of the project (preparation, selling, sold out)
|
32 |
+
- city: VARCHAR(255) - City of the project
|
33 |
+
- lat: FLOAT - GPS latitude coordinates
|
34 |
+
- lng: FLOAT - GPS longitude coordinates
|
35 |
+
- start_year: INTEGER - Year of construction start
|
36 |
+
- end_year: INTEGER - Year of construction end
|
37 |
+
- developer: VARCHAR(255) - Name of the construction company
|
38 |
+
- ignore: BOOLEAN - If True, the project does not have any apartments for sale
|
39 |
+
|
40 |
+
# Apartment prices by type (all INTEGER in CZK with VAT)
|
41 |
+
- price_1kk: Price of 1+kk apartment
|
42 |
+
- price_2kk: Price of 2+kk apartment
|
43 |
+
- price_3kk: Price of 3+kk apartment
|
44 |
+
- price_4kk: Price of 4+kk apartment
|
45 |
+
- price_5kk: Price of 5+kk apartment
|
46 |
+
- price_6kk: Price of 6+kk apartment
|
47 |
+
- price_7kk: Price of 7+kk apartment
|
48 |
+
- price_8kk: Price of 8+kk apartment
|
49 |
+
- price_9kk: Price of 9+kk apartment
|
50 |
+
- price_10kk: Price of 10+kk apartment
|
51 |
+
|
52 |
+
- price_1_1: Price of 1+1 apartment
|
53 |
+
- price_2_1: Price of 2+1 apartment
|
54 |
+
- price_3_1: Price of 3+1 apartment
|
55 |
+
- price_4_1: Price of 4+1 apartment
|
56 |
+
- price_5_1: Price of 5+1 apartment
|
57 |
+
- price_6_1: Price of 6+1 apartment
|
58 |
+
- price_7_1: Price of 7+1 apartment
|
59 |
+
- price_8_1: Price of 8+1 apartment
|
60 |
+
- price_9_1: Price of 9+1 apartment
|
61 |
+
- price_10_1: Price of 10+1 apartment
|
62 |
+
|
63 |
+
- content: TEXT - Raw content (ignore to save tokens)
|
64 |
+
- created_at: DATETIME - Date and time of creation
|
65 |
+
|
66 |
+
Args:
|
67 |
+
query: The query to perform. This should be correct SQL.
|
68 |
+
"""
|
69 |
+
con = sqlite3.connect("estate.db")
|
70 |
+
|
71 |
+
result = con.execute(query)
|
72 |
+
all_rows = result.fetchall()
|
73 |
+
|
74 |
+
con.close()
|
75 |
+
return all_rows
|
76 |
|
|
|
|
|
|
|
77 |
|
|
|
|
|
|
|
78 |
|
79 |
sql_distance_query = """
|
80 |
SELECT
|
|
|
94 |
LIMIT 5;
|
95 |
"""
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
litemodel = LiteLLMModel("o3-mini-2025-01-31", api_base="https://api.openai.com/v1", api_key=os.getenv("OPENAI_API_KEY"))
|
99 |
+
codeagent = CodeAgent(tools=[sql_engine], model=litemodel)
|
|
|
|
|
|
|
|
|
100 |
|
101 |
def chat_with_sql(message, history):
|
102 |
try:
|
103 |
+
input_message = "You are a helpful assistant that presents information about real estate projects in database for investors. Use 'IS NOT NULL' when necessary. Translate any city names from input into Czech language. Present useful fields such as url. Do not make up information or hallucinate. If final result rows are empty respond that you could not find records matching criteria. Use only Czech city names. Note that there is limit 10 records for database result. Reformat final answer in markdown. User Input:\n\n {message}"
|
104 |
+
prompt = input_message.format(message=message)
|
105 |
+
result = codeagent.run(prompt)
|
106 |
+
yield gr.ChatMessage(role="assistant", content=result)
|
107 |
+
|
|
|
|
|
|
|
|
|
|
|
108 |
except Exception as e:
|
109 |
yield f"Error: {str(e)}"
|
110 |
|
estate.db
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2560000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e78ec9ee34c23b783b6451dc9107cf6e1af1ec35fdee048289bbac32503682d0
|
3 |
size 2560000
|
init_db.py
CHANGED
@@ -1,62 +1,80 @@
|
|
1 |
from peewee import *
|
2 |
from datetime import datetime
|
3 |
from playhouse.sqlite_ext import *
|
|
|
4 |
|
5 |
# Initialize database
|
6 |
-
|
7 |
-
json_structure = """JSON column with hash with keys
|
8 |
-
|
9 |
-
'title': str, official title of the project
|
10 |
-
'deposit': int (in percentage), sum of all payments before the apartment is complete ("koladace" requested)
|
11 |
-
'cheapest_available_apartments' : hash with keys:
|
12 |
-
'1+kk': int, lowest price for 1+kk with VAT in CZK. It should not be booked or sold
|
13 |
-
'2+kk': int
|
14 |
-
'3+kk': int
|
15 |
-
'4+kk': int
|
16 |
-
'5+kk': int
|
17 |
-
'7+kk': int
|
18 |
-
'8+kk': int
|
19 |
-
'9+kk': int
|
20 |
-
'10+kk': int
|
21 |
-
'1+1': int
|
22 |
-
'2+1': int
|
23 |
-
'3+1': int
|
24 |
-
'4+1': int
|
25 |
-
'5+1': int
|
26 |
-
'6+1': int
|
27 |
-
'7+1': int
|
28 |
-
'8+1': int
|
29 |
-
'9+1': int
|
30 |
-
'10+1': int
|
31 |
-
'min_price': int, lowest available apartment price in CZK with VAT. It should not be booked or sold.
|
32 |
-
'status': str, status of the project (preparation, selling, sold out)
|
33 |
-
'city': str, city of the project
|
34 |
-
'lat': float, gps coordinates of the project
|
35 |
-
'lng': float, gps coordinates of the project
|
36 |
-
'start_year': int, year of construction start
|
37 |
-
'end_year': int, estimated year of construction end
|
38 |
-
'developer': str, name of the contruction company/"developer"
|
39 |
-
'ignore': bool, if True, the project does not have any apartments for sale. Some values can be null/unknown"""
|
40 |
db = SqliteExtDatabase('estate.db')
|
41 |
|
42 |
-
db_description =
|
43 |
url: url of the project.
|
44 |
-
structure: {json_structure}.
|
45 |
-
content: unstructured additional information about the project.
|
46 |
-
created_at: date and time of creation
|
47 |
title: title of the project
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
"""
|
49 |
|
50 |
class Project(Model):
|
|
|
51 |
url = CharField(unique=True)
|
52 |
title = CharField(null=True)
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
created_at = DateTimeField(default=datetime.now)
|
56 |
|
57 |
class Meta:
|
58 |
database = db
|
59 |
|
|
|
60 |
def init_database():
|
61 |
"""Initialize the database and create tables"""
|
62 |
print("Initializing database...")
|
@@ -64,17 +82,7 @@ def init_database():
|
|
64 |
db.drop_tables([Project])
|
65 |
db.create_tables([Project])
|
66 |
print("Created tables successfully!")
|
67 |
-
|
68 |
-
# Add some test data if needed
|
69 |
-
test_urls = [
|
70 |
-
'https://brnojedna.cz/'
|
71 |
-
]
|
72 |
-
|
73 |
-
for url in test_urls:
|
74 |
-
Project.get_or_create(url=url)
|
75 |
-
|
76 |
-
print(f"Number of projects in database: {Project.select().count()}")
|
77 |
db.close()
|
78 |
|
79 |
if __name__ == "__main__":
|
80 |
-
init_database()
|
|
|
1 |
from peewee import *
|
2 |
from datetime import datetime
|
3 |
from playhouse.sqlite_ext import *
|
4 |
+
import json
|
5 |
|
6 |
# Initialize database
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
db = SqliteExtDatabase('estate.db')
|
8 |
|
9 |
+
db_description = """Table "project" - list of real estate projects (novostavby) in Czech Republic
|
10 |
url: url of the project.
|
|
|
|
|
|
|
11 |
title: title of the project
|
12 |
+
deposit: percentage of payment before completion
|
13 |
+
min_price: lowest available apartment price in CZK with VAT
|
14 |
+
status: status of the project (preparation, selling, sold out)
|
15 |
+
city: city of the project
|
16 |
+
lat: GPS latitude coordinates
|
17 |
+
lng: GPS longitude coordinates
|
18 |
+
start_year: year of construction start
|
19 |
+
end_year: estimated year of construction end
|
20 |
+
developer: name of the construction company
|
21 |
+
ignore: if True, the project does not have any apartments for sale
|
22 |
+
|
23 |
+
Apartment prices are stored in separate columns for each type:
|
24 |
+
price_1kk, price_2kk, price_3kk, etc.
|
25 |
"""
|
26 |
|
27 |
class Project(Model):
|
28 |
+
# Basic information
|
29 |
url = CharField(unique=True)
|
30 |
title = CharField(null=True)
|
31 |
+
|
32 |
+
# Financial information
|
33 |
+
deposit = IntegerField(null=True) # percentage
|
34 |
+
min_price = IntegerField(null=True) # CZK with VAT
|
35 |
+
|
36 |
+
# Apartment prices by type
|
37 |
+
price_1kk = IntegerField(null=True)
|
38 |
+
price_2kk = IntegerField(null=True)
|
39 |
+
price_3kk = IntegerField(null=True)
|
40 |
+
price_4kk = IntegerField(null=True)
|
41 |
+
price_5kk = IntegerField(null=True)
|
42 |
+
price_6kk = IntegerField(null=True)
|
43 |
+
price_7kk = IntegerField(null=True)
|
44 |
+
price_8kk = IntegerField(null=True)
|
45 |
+
price_9kk = IntegerField(null=True)
|
46 |
+
price_10kk = IntegerField(null=True)
|
47 |
+
|
48 |
+
price_1_1 = IntegerField(null=True)
|
49 |
+
price_2_1 = IntegerField(null=True)
|
50 |
+
price_3_1 = IntegerField(null=True)
|
51 |
+
price_4_1 = IntegerField(null=True)
|
52 |
+
price_5_1 = IntegerField(null=True)
|
53 |
+
price_6_1 = IntegerField(null=True)
|
54 |
+
price_7_1 = IntegerField(null=True)
|
55 |
+
price_8_1 = IntegerField(null=True)
|
56 |
+
price_9_1 = IntegerField(null=True)
|
57 |
+
price_10_1 = IntegerField(null=True)
|
58 |
+
|
59 |
+
# Project information
|
60 |
+
status = CharField(null=True)
|
61 |
+
city = CharField(null=True)
|
62 |
+
lat = FloatField(null=True)
|
63 |
+
lng = FloatField(null=True)
|
64 |
+
start_year = IntegerField(null=True)
|
65 |
+
end_year = IntegerField(null=True)
|
66 |
+
developer = CharField(null=True)
|
67 |
+
|
68 |
+
# Additional data
|
69 |
+
ignore = BooleanField(null=True)
|
70 |
+
content = TextField(null=True) # Keep raw content for reference
|
71 |
+
structure = JSONField(null=True) # Keep JSON for backward compatibility
|
72 |
created_at = DateTimeField(default=datetime.now)
|
73 |
|
74 |
class Meta:
|
75 |
database = db
|
76 |
|
77 |
+
|
78 |
def init_database():
|
79 |
"""Initialize the database and create tables"""
|
80 |
print("Initializing database...")
|
|
|
82 |
db.drop_tables([Project])
|
83 |
db.create_tables([Project])
|
84 |
print("Created tables successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
db.close()
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
+
init_database()
|
requirements.in
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
gradio
|
2 |
-
langchain
|
3 |
-
langchain-community
|
4 |
-
langchain-openai
|
5 |
python-dotenv
|
6 |
beautifulsoup4
|
7 |
requests
|
8 |
peewee
|
9 |
openai
|
10 |
-
crawl4ai
|
11 |
datasketch
|
12 |
-
geocoder
|
|
|
|
1 |
gradio
|
|
|
|
|
|
|
2 |
python-dotenv
|
3 |
beautifulsoup4
|
4 |
requests
|
5 |
peewee
|
6 |
openai
|
|
|
7 |
datasketch
|
8 |
+
geocoder
|
9 |
+
smolagents[litellm]
|
requirements.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|