Spaces:
Sleeping
Sleeping
David Hrachovy
commited on
Commit
·
d2d3ca7
1
Parent(s):
e41e611
Update
Browse files- .gitignore +6 -1
- README.md +2 -2
- app.py +30 -5
- init_db.py +51 -0
- requirements.in +3 -1
- requirements.txt +0 -0
.gitignore
CHANGED
@@ -41,4 +41,9 @@ flagged/
|
|
41 |
# Misc
|
42 |
.DS_Store
|
43 |
.gradio/
|
44 |
-
lib/
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# Misc
|
42 |
.DS_Store
|
43 |
.gradio/
|
44 |
+
lib/
|
45 |
+
|
46 |
+
# Scrapy
|
47 |
+
.scrapy/
|
48 |
+
|
49 |
+
_*.py
|
README.md
CHANGED
@@ -34,7 +34,7 @@ A Gradio interface that scrapes and analyzes real estate projects using LangChai
|
|
34 |
|
35 |
3. Install the dependencies:
|
36 |
```sh
|
37 |
-
uv pip
|
38 |
```
|
39 |
|
40 |
4. Run the application:
|
@@ -44,5 +44,5 @@ A Gradio interface that scrapes and analyzes real estate projects using LangChai
|
|
44 |
|
45 |
5. Upgrade all packages to their latest versions:
|
46 |
```sh
|
47 |
-
|
48 |
```
|
|
|
34 |
|
35 |
3. Install the dependencies:
|
36 |
```sh
|
37 |
+
uv pip sync requirements.txt
|
38 |
```
|
39 |
|
40 |
4. Run the application:
|
|
|
44 |
|
45 |
5. Upgrade all packages to their latest versions:
|
46 |
```sh
|
47 |
+
uv pip compile - -o requirements.txt
|
48 |
```
|
app.py
CHANGED
@@ -6,8 +6,9 @@ from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
|
6 |
import gradio as gr
|
7 |
from dotenv import load_dotenv
|
8 |
from langchain.schema import HumanMessage, AIMessage
|
|
|
9 |
import os.path
|
10 |
-
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
13 |
|
@@ -18,15 +19,38 @@ if not os.path.exists('estate.db'):
|
|
18 |
)
|
19 |
|
20 |
# Initialize model and database
|
21 |
-
model = ChatOpenAI(model="gpt-
|
22 |
db = SQLDatabase.from_uri("sqlite:///estate.db")
|
23 |
|
24 |
# Set up SQL toolkit and tools
|
25 |
toolkit = SQLDatabaseToolkit(db=db, llm=model)
|
26 |
tools = toolkit.get_tools()
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Get the OpenAI tools agent prompt
|
29 |
-
prompt =
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Create the agent with OpenAI tools format
|
32 |
agent = create_openai_tools_agent(
|
@@ -61,9 +85,10 @@ def chat_with_sql(message, history):
|
|
61 |
demo = gr.ChatInterface(
|
62 |
fn=chat_with_sql,
|
63 |
title="Estate Chat",
|
64 |
-
description="
|
65 |
examples=[
|
66 |
-
"
|
|
|
67 |
],
|
68 |
type="messages"
|
69 |
)
|
|
|
6 |
import gradio as gr
|
7 |
from dotenv import load_dotenv
|
8 |
from langchain.schema import HumanMessage, AIMessage
|
9 |
+
from langchain.prompts import ChatPromptTemplate
|
10 |
import os.path
|
11 |
+
from init_db import db_description
|
12 |
# Load environment variables
|
13 |
load_dotenv()
|
14 |
|
|
|
19 |
)
|
20 |
|
21 |
# Initialize model and database
|
22 |
+
model = ChatOpenAI(model="gpt-4o-2024-08-06", streaming=True)
|
23 |
db = SQLDatabase.from_uri("sqlite:///estate.db")
|
24 |
|
25 |
# Set up SQL toolkit and tools
|
26 |
toolkit = SQLDatabaseToolkit(db=db, llm=model)
|
27 |
tools = toolkit.get_tools()
|
28 |
|
29 |
+
sql_distance_query = """
|
30 |
+
SELECT
|
31 |
+
id,
|
32 |
+
url,
|
33 |
+
structure->>'$.lat' AS lat,
|
34 |
+
structure->>'$.lng' AS lng,
|
35 |
+
(6371000 * acos(
|
36 |
+
cos(radians(50.08804)) * cos(radians(CAST(structure->>'$.lat' AS FLOAT))) *
|
37 |
+
cos(radians(CAST(structure->>'$.lng' AS FLOAT)) - radians(14.42076)) +
|
38 |
+
sin(radians(50.08804)) * sin(radians(CAST(structure->>'$.lat' AS FLOAT)))
|
39 |
+
)) AS distance
|
40 |
+
FROM project
|
41 |
+
WHERE structure->>'$.lat' IS NOT NULL
|
42 |
+
AND structure->>'$.lng' IS NOT NULL
|
43 |
+
ORDER BY distance ASC
|
44 |
+
LIMIT 5;
|
45 |
+
"""
|
46 |
+
|
47 |
# Get the OpenAI tools agent prompt
|
48 |
+
prompt = ChatPromptTemplate.from_messages([
|
49 |
+
("system", "You are a helpful assistant. You speak Czech. You can answer questions about real estate projects (novostavby) in Czech Republic. You have access to a database of real estate projects (translate output to Czech too). Some info about the structure of the database: " + db_description + "Note that some json values can be null so you must sometimes check with IS NOT NULL. For calculating you can adapt this query: " + sql_distance_query),
|
50 |
+
("placeholder", "{chat_history}"),
|
51 |
+
("human", "{input}"),
|
52 |
+
("placeholder", "{agent_scratchpad}"),
|
53 |
+
])
|
54 |
|
55 |
# Create the agent with OpenAI tools format
|
56 |
agent = create_openai_tools_agent(
|
|
|
85 |
demo = gr.ChatInterface(
|
86 |
fn=chat_with_sql,
|
87 |
title="Estate Chat",
|
88 |
+
description="Zeptej se me na cokoli o novostavbách v ČR 🇨🇿",
|
89 |
examples=[
|
90 |
+
"Projekt s nejnižším vkladem",
|
91 |
+
"Nejlevnejsi byt v Praze",
|
92 |
],
|
93 |
type="messages"
|
94 |
)
|
init_db.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from peewee import *
|
2 |
+
from datetime import datetime
|
3 |
+
from playhouse.sqlite_ext import *
|
4 |
+
|
5 |
+
# Initialize database
|
6 |
+
|
7 |
+
json_structure = "JSON column with hash with keys 'title': str, official title of the project, 'deposit': int, initial deposit (in percentage), 'min_price': int, lowest available property price with VAT. It should not be booked or sold., 'city': str, city of the project, 'lat': float, gps coordinates of the project, 'lng': float, gps coordinates of the project, 'start_year': int, year of construction start, 'end_year': int, estimated year of construction end. Some values can be null/unknown"
|
8 |
+
|
9 |
+
db = SqliteExtDatabase('estate.db', pragmas=(
|
10 |
+
('cache_size', -1024 * 64), # 64MB page-cache.
|
11 |
+
('journal_mode', 'wal'), # Use WAL-mode (you should always use this!).
|
12 |
+
('foreign_keys', 1))) # Enforce foreign-key constraints.
|
13 |
+
# Define the Project model
|
14 |
+
|
15 |
+
db_description = f"""Table "project" - list of real estate projects (novostavby) in Czech Republic
|
16 |
+
url: url of the project.
|
17 |
+
structure: {json_structure}.
|
18 |
+
content: contents of the website.
|
19 |
+
created_at: date and time of creation.
|
20 |
+
"""
|
21 |
+
|
22 |
+
class Project(Model):
|
23 |
+
url = CharField(unique=True)
|
24 |
+
structure = JSONField(null=True)
|
25 |
+
content = TextField(null=True)
|
26 |
+
created_at = DateTimeField(default=datetime.now)
|
27 |
+
|
28 |
+
class Meta:
|
29 |
+
database = db
|
30 |
+
|
31 |
+
def init_database():
|
32 |
+
"""Initialize the database and create tables"""
|
33 |
+
print("Initializing database...")
|
34 |
+
db.connect()
|
35 |
+
db.drop_tables([Project])
|
36 |
+
db.create_tables([Project])
|
37 |
+
print("Created tables successfully!")
|
38 |
+
|
39 |
+
# Add some test data if needed
|
40 |
+
test_urls = [
|
41 |
+
'https://brnojedna.cz/'
|
42 |
+
]
|
43 |
+
|
44 |
+
for url in test_urls:
|
45 |
+
Project.get_or_create(url=url)
|
46 |
+
|
47 |
+
print(f"Number of projects in database: {Project.select().count()}")
|
48 |
+
db.close()
|
49 |
+
|
50 |
+
if __name__ == "__main__":
|
51 |
+
init_database()
|
requirements.in
CHANGED
@@ -6,5 +6,7 @@ python-dotenv
|
|
6 |
beautifulsoup4
|
7 |
requests
|
8 |
peewee
|
9 |
-
browser_use
|
10 |
openai
|
|
|
|
|
|
|
|
6 |
beautifulsoup4
|
7 |
requests
|
8 |
peewee
|
|
|
9 |
openai
|
10 |
+
crawl4ai
|
11 |
+
datasketch
|
12 |
+
geocoder
|
requirements.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|