David Hrachovy commited on
Commit
190ecd4
·
1 Parent(s): d972137
Files changed (6) hide show
  1. .gitignore +3 -1
  2. app.py +71 -49
  3. estate.db +1 -1
  4. init_db.py +59 -51
  5. requirements.in +2 -5
  6. requirements.txt +0 -0
.gitignore CHANGED
@@ -46,4 +46,6 @@ lib/
46
  # Scrapy
47
  .scrapy/
48
 
49
- _*.py
 
 
 
46
  # Scrapy
47
  .scrapy/
48
 
49
+ _*.py
50
+ *.db-shm
51
+ *.db-wal
app.py CHANGED
@@ -1,35 +1,80 @@
1
  # Standard library imports
2
- import os.path
 
 
 
3
 
4
  # Third-party imports
5
  import gradio as gr
6
  from dotenv import load_dotenv
7
- from langchain.agents import AgentExecutor, create_openai_tools_agent
8
- from langchain.prompts import ChatPromptTemplate
9
- from langchain.schema import HumanMessage, AIMessage
10
- from langchain_community.utilities import SQLDatabase
11
- from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
12
- from langchain_openai import ChatOpenAI
13
 
14
  # Local imports
15
- from init_db import Project, db_description, json_structure
16
 
17
  # Load environment variables
18
  load_dotenv()
19
 
20
- # Check if database exists
21
- if not os.path.exists('estate.db'):
22
- raise FileNotFoundError(
23
- "Database file 'estate.db' not found. Please run 'uv run init_db.py' first."
24
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # Initialize model and database
27
- model = ChatOpenAI(model="o3-mini-2025-01-31", streaming=True)
28
- db = SQLDatabase.from_uri("sqlite:///estate.db")
29
 
30
- # Set up SQL toolkit and tools
31
- toolkit = SQLDatabaseToolkit(db=db, llm=model)
32
- tools = toolkit.get_tools()
33
 
34
  sql_distance_query = """
35
  SELECT
@@ -49,40 +94,17 @@ ORDER BY distance ASC
49
  LIMIT 5;
50
  """
51
 
52
- # Get the OpenAI tools agent prompt
53
- prompt = ChatPromptTemplate.from_messages([
54
- ("system", f"You are a helpful assistant that presents information about real estate projects in database for investors. Use 'IS NOT NULL' when necessary. Use Czech city names. Do not present SQL or JSON. Use simple language. Do not make up information or hallucinate. If final result rows are empty respond that you could not find records matching criteria. Table structure: {db_description}. This JSON structure info can help you: {json_structure}."),
55
- ("placeholder", "{chat_history}"),
56
- ("human", "{input}"),
57
- ("placeholder", "{agent_scratchpad}"),
58
- ])
59
-
60
- # Create the agent with OpenAI tools format
61
- agent = create_openai_tools_agent(
62
- llm=model,
63
- tools=tools,
64
- prompt=prompt
65
- )
66
 
67
- # Create agent executor
68
- agent_executor = AgentExecutor(
69
- agent=agent,
70
- tools=tools,
71
- verbose=True
72
- )
73
 
74
  def chat_with_sql(message, history):
75
  try:
76
- history_langchain_format = []
77
- for msg in history:
78
- if msg['role'] == "user":
79
- history_langchain_format.append(HumanMessage(content=msg['content']))
80
- elif msg['role'] == "assistant":
81
- history_langchain_format.append(AIMessage(content=msg['content']))
82
- history_langchain_format.append(HumanMessage(content=message))
83
- response = agent_executor.invoke({"input": message, "history": history_langchain_format})
84
- for i in range(len(response["output"])):
85
- yield response["output"][:i+1]
86
  except Exception as e:
87
  yield f"Error: {str(e)}"
88
 
 
1
  # Standard library imports
2
+ import os
3
+ import re
4
+ import sqlite3
5
+ from typing import List, Tuple, Optional
6
 
7
  # Third-party imports
8
  import gradio as gr
9
  from dotenv import load_dotenv
10
+
11
+ # SmolaGents imports
12
+ from smolagents import CodeAgent, LiteLLMModel, tool
13
+ from smolagents.agent_types import AgentText
 
 
14
 
15
  # Local imports
16
+ from init_db import Project
17
 
18
  # Load environment variables
19
  load_dotenv()
20
 
21
+ @tool
22
+ def sql_engine(query: str) -> List[Tuple]:
23
+ """
24
+ Allows you to perform SQL queries on the table in SQLite database. Returns list of results.
25
+ The table is named 'project'. Its description is as follows:
26
+ Columns:
27
+ - url: VARCHAR(255) - URL of the project
28
+ - title: VARCHAR(255) - Title of the project
29
+ - deposit: INTEGER - Percentage of payment before completion
30
+ - min_price: INTEGER - Lowest available apartment price in CZK with VAT
31
+ - status: VARCHAR(255) - Status of the project (preparation, selling, sold out)
32
+ - city: VARCHAR(255) - City of the project
33
+ - lat: FLOAT - GPS latitude coordinates
34
+ - lng: FLOAT - GPS longitude coordinates
35
+ - start_year: INTEGER - Year of construction start
36
+ - end_year: INTEGER - Year of construction end
37
+ - developer: VARCHAR(255) - Name of the construction company
38
+ - ignore: BOOLEAN - If True, the project does not have any apartments for sale
39
+
40
+ # Apartment prices by type (all INTEGER in CZK with VAT)
41
+ - price_1kk: Price of 1+kk apartment
42
+ - price_2kk: Price of 2+kk apartment
43
+ - price_3kk: Price of 3+kk apartment
44
+ - price_4kk: Price of 4+kk apartment
45
+ - price_5kk: Price of 5+kk apartment
46
+ - price_6kk: Price of 6+kk apartment
47
+ - price_7kk: Price of 7+kk apartment
48
+ - price_8kk: Price of 8+kk apartment
49
+ - price_9kk: Price of 9+kk apartment
50
+ - price_10kk: Price of 10+kk apartment
51
+
52
+ - price_1_1: Price of 1+1 apartment
53
+ - price_2_1: Price of 2+1 apartment
54
+ - price_3_1: Price of 3+1 apartment
55
+ - price_4_1: Price of 4+1 apartment
56
+ - price_5_1: Price of 5+1 apartment
57
+ - price_6_1: Price of 6+1 apartment
58
+ - price_7_1: Price of 7+1 apartment
59
+ - price_8_1: Price of 8+1 apartment
60
+ - price_9_1: Price of 9+1 apartment
61
+ - price_10_1: Price of 10+1 apartment
62
+
63
+ - content: TEXT - Raw content (ignore to save tokens)
64
+ - created_at: DATETIME - Date and time of creation
65
+
66
+ Args:
67
+ query: The query to perform. This should be correct SQL.
68
+ """
69
+ con = sqlite3.connect("estate.db")
70
+
71
+ result = con.execute(query)
72
+ all_rows = result.fetchall()
73
+
74
+ con.close()
75
+ return all_rows
76
 
 
 
 
77
 
 
 
 
78
 
79
  sql_distance_query = """
80
  SELECT
 
94
  LIMIT 5;
95
  """
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ litemodel = LiteLLMModel("o3-mini-2025-01-31", api_base="https://api.openai.com/v1", api_key=os.getenv("OPENAI_API_KEY"))
99
+ codeagent = CodeAgent(tools=[sql_engine], model=litemodel)
 
 
 
 
100
 
101
  def chat_with_sql(message, history):
102
  try:
103
+ input_message = "You are a helpful assistant that presents information about real estate projects in database for investors. Use 'IS NOT NULL' when necessary. Translate any city names from input into Czech language. Present useful fields such as url. Do not make up information or hallucinate. If final result rows are empty respond that you could not find records matching criteria. Use only Czech city names. Note that there is limit 10 records for database result. Reformat final answer in markdown. User Input:\n\n {message}"
104
+ prompt = input_message.format(message=message)
105
+ result = codeagent.run(prompt)
106
+ yield gr.ChatMessage(role="assistant", content=result)
107
+
 
 
 
 
 
108
  except Exception as e:
109
  yield f"Error: {str(e)}"
110
 
estate.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d848c98537602c1b22c1464e08b2662446e9cd32c8d48324b177baca184a253c
3
  size 2560000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78ec9ee34c23b783b6451dc9107cf6e1af1ec35fdee048289bbac32503682d0
3
  size 2560000
init_db.py CHANGED
@@ -1,62 +1,80 @@
1
  from peewee import *
2
  from datetime import datetime
3
  from playhouse.sqlite_ext import *
 
4
 
5
  # Initialize database
6
-
7
- json_structure = """JSON column with hash with keys
8
-
9
- 'title': str, official title of the project
10
- 'deposit': int (in percentage), sum of all payments before the apartment is complete ("koladace" requested)
11
- 'cheapest_available_apartments' : hash with keys:
12
- '1+kk': int, lowest price for 1+kk with VAT in CZK. It should not be booked or sold
13
- '2+kk': int
14
- '3+kk': int
15
- '4+kk': int
16
- '5+kk': int
17
- '7+kk': int
18
- '8+kk': int
19
- '9+kk': int
20
- '10+kk': int
21
- '1+1': int
22
- '2+1': int
23
- '3+1': int
24
- '4+1': int
25
- '5+1': int
26
- '6+1': int
27
- '7+1': int
28
- '8+1': int
29
- '9+1': int
30
- '10+1': int
31
- 'min_price': int, lowest available apartment price in CZK with VAT. It should not be booked or sold.
32
- 'status': str, status of the project (preparation, selling, sold out)
33
- 'city': str, city of the project
34
- 'lat': float, gps coordinates of the project
35
- 'lng': float, gps coordinates of the project
36
- 'start_year': int, year of construction start
37
- 'end_year': int, estimated year of construction end
38
- 'developer': str, name of the contruction company/"developer"
39
- 'ignore': bool, if True, the project does not have any apartments for sale. Some values can be null/unknown"""
40
  db = SqliteExtDatabase('estate.db')
41
 
42
- db_description = f"""Table "project" - list of real estate projects (novostavby) in Czech Republic
43
  url: url of the project.
44
- structure: {json_structure}.
45
- content: unstructured additional information about the project.
46
- created_at: date and time of creation
47
  title: title of the project
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  """
49
 
50
  class Project(Model):
 
51
  url = CharField(unique=True)
52
  title = CharField(null=True)
53
- structure = JSONField(null=True)
54
- content = TextField(null=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  created_at = DateTimeField(default=datetime.now)
56
 
57
  class Meta:
58
  database = db
59
 
 
60
  def init_database():
61
  """Initialize the database and create tables"""
62
  print("Initializing database...")
@@ -64,17 +82,7 @@ def init_database():
64
  db.drop_tables([Project])
65
  db.create_tables([Project])
66
  print("Created tables successfully!")
67
-
68
- # Add some test data if needed
69
- test_urls = [
70
- 'https://brnojedna.cz/'
71
- ]
72
-
73
- for url in test_urls:
74
- Project.get_or_create(url=url)
75
-
76
- print(f"Number of projects in database: {Project.select().count()}")
77
  db.close()
78
 
79
  if __name__ == "__main__":
80
- init_database()
 
1
  from peewee import *
2
  from datetime import datetime
3
  from playhouse.sqlite_ext import *
4
+ import json
5
 
6
  # Initialize database
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  db = SqliteExtDatabase('estate.db')
8
 
9
+ db_description = """Table "project" - list of real estate projects (novostavby) in Czech Republic
10
  url: url of the project.
 
 
 
11
  title: title of the project
12
+ deposit: percentage of payment before completion
13
+ min_price: lowest available apartment price in CZK with VAT
14
+ status: status of the project (preparation, selling, sold out)
15
+ city: city of the project
16
+ lat: GPS latitude coordinates
17
+ lng: GPS longitude coordinates
18
+ start_year: year of construction start
19
+ end_year: estimated year of construction end
20
+ developer: name of the construction company
21
+ ignore: if True, the project does not have any apartments for sale
22
+
23
+ Apartment prices are stored in separate columns for each type:
24
+ price_1kk, price_2kk, price_3kk, etc.
25
  """
26
 
27
  class Project(Model):
28
+ # Basic information
29
  url = CharField(unique=True)
30
  title = CharField(null=True)
31
+
32
+ # Financial information
33
+ deposit = IntegerField(null=True) # percentage
34
+ min_price = IntegerField(null=True) # CZK with VAT
35
+
36
+ # Apartment prices by type
37
+ price_1kk = IntegerField(null=True)
38
+ price_2kk = IntegerField(null=True)
39
+ price_3kk = IntegerField(null=True)
40
+ price_4kk = IntegerField(null=True)
41
+ price_5kk = IntegerField(null=True)
42
+ price_6kk = IntegerField(null=True)
43
+ price_7kk = IntegerField(null=True)
44
+ price_8kk = IntegerField(null=True)
45
+ price_9kk = IntegerField(null=True)
46
+ price_10kk = IntegerField(null=True)
47
+
48
+ price_1_1 = IntegerField(null=True)
49
+ price_2_1 = IntegerField(null=True)
50
+ price_3_1 = IntegerField(null=True)
51
+ price_4_1 = IntegerField(null=True)
52
+ price_5_1 = IntegerField(null=True)
53
+ price_6_1 = IntegerField(null=True)
54
+ price_7_1 = IntegerField(null=True)
55
+ price_8_1 = IntegerField(null=True)
56
+ price_9_1 = IntegerField(null=True)
57
+ price_10_1 = IntegerField(null=True)
58
+
59
+ # Project information
60
+ status = CharField(null=True)
61
+ city = CharField(null=True)
62
+ lat = FloatField(null=True)
63
+ lng = FloatField(null=True)
64
+ start_year = IntegerField(null=True)
65
+ end_year = IntegerField(null=True)
66
+ developer = CharField(null=True)
67
+
68
+ # Additional data
69
+ ignore = BooleanField(null=True)
70
+ content = TextField(null=True) # Keep raw content for reference
71
+ structure = JSONField(null=True) # Keep JSON for backward compatibility
72
  created_at = DateTimeField(default=datetime.now)
73
 
74
  class Meta:
75
  database = db
76
 
77
+
78
  def init_database():
79
  """Initialize the database and create tables"""
80
  print("Initializing database...")
 
82
  db.drop_tables([Project])
83
  db.create_tables([Project])
84
  print("Created tables successfully!")
 
 
 
 
 
 
 
 
 
 
85
  db.close()
86
 
87
  if __name__ == "__main__":
88
+ init_database()
requirements.in CHANGED
@@ -1,12 +1,9 @@
1
  gradio
2
- langchain
3
- langchain-community
4
- langchain-openai
5
  python-dotenv
6
  beautifulsoup4
7
  requests
8
  peewee
9
  openai
10
- crawl4ai
11
  datasketch
12
- geocoder
 
 
1
  gradio
 
 
 
2
  python-dotenv
3
  beautifulsoup4
4
  requests
5
  peewee
6
  openai
 
7
  datasketch
8
+ geocoder
9
+ smolagents[litellm]
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff