David Hrachovy commited on
Commit
d2d3ca7
·
1 Parent(s): e41e611
Files changed (6) hide show
  1. .gitignore +6 -1
  2. README.md +2 -2
  3. app.py +30 -5
  4. init_db.py +51 -0
  5. requirements.in +3 -1
  6. requirements.txt +0 -0
.gitignore CHANGED
@@ -41,4 +41,9 @@ flagged/
41
  # Misc
42
  .DS_Store
43
  .gradio/
44
- lib/
 
 
 
 
 
 
41
  # Misc
42
  .DS_Store
43
  .gradio/
44
+ lib/
45
+
46
+ # Scrapy
47
+ .scrapy/
48
+
49
+ _*.py
README.md CHANGED
@@ -34,7 +34,7 @@ A Gradio interface that scrapes and analyzes real estate projects using LangChai
34
 
35
  3. Install the dependencies:
36
  ```sh
37
- uv pip install -r requirements.txt
38
  ```
39
 
40
  4. Run the application:
@@ -44,5 +44,5 @@ A Gradio interface that scrapes and analyzes real estate projects using LangChai
44
 
45
  5. Upgrade all packages to their latest versions:
46
  ```sh
47
- uv pip install --upgrade -r requirements.txt
48
  ```
 
34
 
35
  3. Install the dependencies:
36
  ```sh
37
+ uv pip sync requirements.txt
38
  ```
39
 
40
  4. Run the application:
 
44
 
45
  5. Upgrade all packages to their latest versions:
46
  ```sh
47
+ uv pip compile - -o requirements.txt
48
  ```
app.py CHANGED
@@ -6,8 +6,9 @@ from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
  from langchain.schema import HumanMessage, AIMessage
 
9
  import os.path
10
-
11
  # Load environment variables
12
  load_dotenv()
13
 
@@ -18,15 +19,38 @@ if not os.path.exists('estate.db'):
18
  )
19
 
20
  # Initialize model and database
21
- model = ChatOpenAI(model="gpt-4", streaming=True)
22
  db = SQLDatabase.from_uri("sqlite:///estate.db")
23
 
24
  # Set up SQL toolkit and tools
25
  toolkit = SQLDatabaseToolkit(db=db, llm=model)
26
  tools = toolkit.get_tools()
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  # Get the OpenAI tools agent prompt
29
- prompt = hub.pull("hwchase17/openai-tools-agent")
 
 
 
 
 
30
 
31
  # Create the agent with OpenAI tools format
32
  agent = create_openai_tools_agent(
@@ -61,9 +85,10 @@ def chat_with_sql(message, history):
61
  demo = gr.ChatInterface(
62
  fn=chat_with_sql,
63
  title="Estate Chat",
64
- description="Ask questions about 🇨🇿 real estate projects!",
65
  examples=[
66
- "What is project with the lowest deposit?",
 
67
  ],
68
  type="messages"
69
  )
 
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
  from langchain.schema import HumanMessage, AIMessage
9
+ from langchain.prompts import ChatPromptTemplate
10
  import os.path
11
+ from init_db import db_description
12
  # Load environment variables
13
  load_dotenv()
14
 
 
19
  )
20
 
21
  # Initialize model and database
22
+ model = ChatOpenAI(model="gpt-4o-2024-08-06", streaming=True)
23
  db = SQLDatabase.from_uri("sqlite:///estate.db")
24
 
25
  # Set up SQL toolkit and tools
26
  toolkit = SQLDatabaseToolkit(db=db, llm=model)
27
  tools = toolkit.get_tools()
28
 
29
+ sql_distance_query = """
30
+ SELECT
31
+ id,
32
+ url,
33
+ structure->>'$.lat' AS lat,
34
+ structure->>'$.lng' AS lng,
35
+ (6371000 * acos(
36
+ cos(radians(50.08804)) * cos(radians(CAST(structure->>'$.lat' AS FLOAT))) *
37
+ cos(radians(CAST(structure->>'$.lng' AS FLOAT)) - radians(14.42076)) +
38
+ sin(radians(50.08804)) * sin(radians(CAST(structure->>'$.lat' AS FLOAT)))
39
+ )) AS distance
40
+ FROM project
41
+ WHERE structure->>'$.lat' IS NOT NULL
42
+ AND structure->>'$.lng' IS NOT NULL
43
+ ORDER BY distance ASC
44
+ LIMIT 5;
45
+ """
46
+
47
  # Get the OpenAI tools agent prompt
48
+ prompt = ChatPromptTemplate.from_messages([
49
+ ("system", "You are a helpful assistant. You speak Czech. You can answer questions about real estate projects (novostavby) in Czech Republic. You have access to a database of real estate projects (translate output to Czech too). Some info about the structure of the database: " + db_description + "Note that some json values can be null so you must sometimes check with IS NOT NULL. For calculating you can adapt this query: " + sql_distance_query),
50
+ ("placeholder", "{chat_history}"),
51
+ ("human", "{input}"),
52
+ ("placeholder", "{agent_scratchpad}"),
53
+ ])
54
 
55
  # Create the agent with OpenAI tools format
56
  agent = create_openai_tools_agent(
 
85
  demo = gr.ChatInterface(
86
  fn=chat_with_sql,
87
  title="Estate Chat",
88
+ description="Zeptej se me na cokoli o novostavbách v ČR 🇨🇿",
89
  examples=[
90
+ "Projekt s nejnižším vkladem",
91
+ "Nejlevnejsi byt v Praze",
92
  ],
93
  type="messages"
94
  )
init_db.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peewee import *
2
+ from datetime import datetime
3
+ from playhouse.sqlite_ext import *
4
+
5
+ # Initialize database
6
+
7
+ json_structure = "JSON column with hash with keys 'title': str, official title of the project, 'deposit': int, initial deposit (in percentage), 'min_price': int, lowest available property price with VAT. It should not be booked or sold., 'city': str, city of the project, 'lat': float, gps coordinates of the project, 'lng': float, gps coordinates of the project, 'start_year': int, year of construction start, 'end_year': int, estimated year of construction end. Some values can be null/unknown"
8
+
9
+ db = SqliteExtDatabase('estate.db', pragmas=(
10
+ ('cache_size', -1024 * 64), # 64MB page-cache.
11
+ ('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
12
+ ('foreign_keys', 1))) # Enforce foreign-key constraints.
13
+ # Define the Project model
14
+
15
+ db_description = f"""Table "project" - list of real estate projects (novostavby) in Czech Republic
16
+ url: url of the project.
17
+ structure: {json_structure}.
18
+ content: contents of the website.
19
+ created_at: date and time of creation.
20
+ """
21
+
22
+ class Project(Model):
23
+ url = CharField(unique=True)
24
+ structure = JSONField(null=True)
25
+ content = TextField(null=True)
26
+ created_at = DateTimeField(default=datetime.now)
27
+
28
+ class Meta:
29
+ database = db
30
+
31
+ def init_database():
32
+ """Initialize the database and create tables"""
33
+ print("Initializing database...")
34
+ db.connect()
35
+ db.drop_tables([Project])
36
+ db.create_tables([Project])
37
+ print("Created tables successfully!")
38
+
39
+ # Add some test data if needed
40
+ test_urls = [
41
+ 'https://brnojedna.cz/'
42
+ ]
43
+
44
+ for url in test_urls:
45
+ Project.get_or_create(url=url)
46
+
47
+ print(f"Number of projects in database: {Project.select().count()}")
48
+ db.close()
49
+
50
+ if __name__ == "__main__":
51
+ init_database()
requirements.in CHANGED
@@ -6,5 +6,7 @@ python-dotenv
6
  beautifulsoup4
7
  requests
8
  peewee
9
- browser_use
10
  openai
 
 
 
 
6
  beautifulsoup4
7
  requests
8
  peewee
 
9
  openai
10
+ crawl4ai
11
+ datasketch
12
+ geocoder
requirements.txt CHANGED
The diff for this file is too large to render. See raw diff