Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,9 +6,10 @@ import plotly.graph_objects as go
|
|
6 |
from scipy.stats import norm, t
|
7 |
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
|
8 |
import plotly.figure_factory as ff
|
9 |
-
#from vllm import LLM, SamplingParams
|
10 |
from huggingface_hub import InferenceClient
|
11 |
from sklearn.cluster import KMeans
|
|
|
|
|
12 |
|
13 |
def sorting(df):
|
14 |
df.index = list(map(float, df.index))
|
@@ -544,116 +545,29 @@ def upload_and_select_dataframe():
|
|
544 |
st.sidebar.info("Please upload some files.")
|
545 |
return None
|
546 |
|
|
|
|
|
547 |
|
548 |
-
|
549 |
-
|
550 |
-
def categorize_responses_persian(initial_prompt: str,
|
551 |
-
dataframe: pd.DataFrame,
|
552 |
-
id_column: str,
|
553 |
-
text_column: str,
|
554 |
-
api_key: str,
|
555 |
-
max_retries: int = 3,
|
556 |
-
delay: float = 1.5) -> pd.DataFrame:
|
557 |
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
test_payload = {
|
562 |
-
"model": "deepseek-chat",
|
563 |
-
"messages": [{"role": "user", "content": "سلام"}]
|
564 |
-
}
|
565 |
-
try:
|
566 |
-
response = requests.post(
|
567 |
-
"https://api.deepseek.com/v1/chat/completions",
|
568 |
-
headers={"Authorization": f"Bearer {api_key}"},
|
569 |
-
json=test_payload
|
570 |
-
)
|
571 |
-
response.raise_for_status()
|
572 |
-
return True
|
573 |
-
except Exception as e:
|
574 |
-
print(f"خطای احراز هویت API: {str(e)}")
|
575 |
-
print(f"پاسخ کامل API: {response.text if 'response' in locals() else ''}")
|
576 |
-
return False
|
577 |
-
|
578 |
-
if not validate_api():
|
579 |
-
raise ConnectionError("اتصال به API برقرار نشد. کلید API یا اتصال اینترنت را بررسی کنید.")
|
580 |
-
|
581 |
-
# پردازش اصلی
|
582 |
-
headers = {
|
583 |
-
"Authorization": f"Bearer {api_key}",
|
584 |
-
"Content-Type": "application/json; charset=utf-8"
|
585 |
-
}
|
586 |
-
|
587 |
-
def get_persian_category(answer: str) -> str:
|
588 |
-
messages = [
|
589 |
-
{
|
590 |
-
"role": "system",
|
591 |
-
"content": f"{initial_prompt}\n\nلطفا پاسخ را به یکی از دستههای تعیین شده اختصاص دهید. فقط نام دسته را بازگردانید."
|
592 |
-
},
|
593 |
-
{
|
594 |
-
"role": "user",
|
595 |
-
"content": answer
|
596 |
-
}
|
597 |
-
]
|
598 |
-
|
599 |
-
payload = {
|
600 |
-
"model": "deepseek-chat",
|
601 |
-
"messages": messages,
|
602 |
-
"temperature": 0.3,
|
603 |
-
"max_tokens": 50
|
604 |
-
}
|
605 |
-
|
606 |
-
for attempt in range(max_retries):
|
607 |
-
try:
|
608 |
-
response = requests.post(
|
609 |
-
"https://api.deepseek.com/v1/chat/completions",
|
610 |
-
headers=headers,
|
611 |
-
json=payload,
|
612 |
-
timeout=10
|
613 |
-
)
|
614 |
-
|
615 |
-
# دیباگ پاسخ خام
|
616 |
-
print(f"\nوضعیت HTTP: {response.status_code}")
|
617 |
-
print("هدرهای پاسخ:")
|
618 |
-
print(json.dumps(dict(response.headers), indent=2))
|
619 |
-
print("بدنه پاسخ:")
|
620 |
-
print(response.text)
|
621 |
-
|
622 |
-
response.raise_for_status()
|
623 |
-
|
624 |
-
result = response.json()
|
625 |
-
return result['choices'][0]['message']['content'].strip()
|
626 |
-
|
627 |
-
except requests.exceptions.HTTPError as err:
|
628 |
-
print(f"\nخطای HTTP: {err}")
|
629 |
-
print(f"جزئیات خطا: {response.text}")
|
630 |
-
if response.status_code == 401:
|
631 |
-
raise PermissionError("کلید API نامعتبر است")
|
632 |
-
time.sleep(2 ** attempt) # Backoff نمایی
|
633 |
-
|
634 |
-
except Exception as e:
|
635 |
-
print(f"\nخطای عمومی: {str(e)}")
|
636 |
-
time.sleep(1)
|
637 |
|
638 |
-
|
639 |
|
640 |
-
|
641 |
-
|
642 |
-
dataframe['category'] = dataframe[text_column].progress_apply(get_persian_category)
|
643 |
-
|
644 |
-
return dataframe
|
645 |
-
empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
|
646 |
|
647 |
-
with
|
648 |
-
|
649 |
|
650 |
-
with
|
651 |
-
st.
|
652 |
|
653 |
-
|
654 |
-
st.title("Chortke")
|
655 |
|
656 |
-
st.
|
657 |
|
658 |
df = upload_and_select_dataframe()
|
659 |
|
@@ -664,7 +578,7 @@ try:
|
|
664 |
cols = edit_strings(df.columns)
|
665 |
cols = sorted(list(set(cols)))
|
666 |
|
667 |
-
main_option = st.selectbox("Please select an option:", ["Tabulation", "Funnel Analysis", "Segmentation Analysis", "Hypothesis test", "Machine Learning", "Coding", "AI Chat"])
|
668 |
|
669 |
if main_option == "Tabulation":
|
670 |
st.header("Tabulation Analysis")
|
@@ -1040,5 +954,16 @@ try:
|
|
1040 |
for chunk in stream:
|
1041 |
st.warning(chunk.choices[0].delta.content, end="")
|
1042 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1043 |
except Exception as e:
|
1044 |
st.error(f"❌ Error: {e}")
|
|
|
6 |
from scipy.stats import norm, t
|
7 |
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
|
8 |
import plotly.figure_factory as ff
|
|
|
9 |
from huggingface_hub import InferenceClient
|
10 |
from sklearn.cluster import KMeans
|
11 |
+
import json
|
12 |
+
import math
|
13 |
|
14 |
def sorting(df):
|
15 |
df.index = list(map(float, df.index))
|
|
|
545 |
st.sidebar.info("Please upload some files.")
|
546 |
return None
|
547 |
|
548 |
+
def sample_size_calculator(confidence_level, p, E):
|
549 |
+
Z = norm.ppf(1 - (1 - confidence_level) / 2)
|
550 |
|
551 |
+
n = (Z**2 * p * (1 - p)) / (E**2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
+
n = math.ceil(n)
|
554 |
+
|
555 |
+
return n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
556 |
|
557 |
+
#empty_col1, main_col, empty_col2 = st.columns([1.6, 2.8, 1.6])
|
558 |
|
559 |
+
#with main_col:
|
560 |
+
# col1, col2 = st.columns([2, 3])
|
|
|
|
|
|
|
|
|
561 |
|
562 |
+
# with col1:
|
563 |
+
# st.image("logo.png", width=400)
|
564 |
|
565 |
+
# with col2:
|
566 |
+
# st.title("Insightzen")
|
567 |
|
568 |
+
#st.markdown('[Click to register a suggestion or comment](https://docs.google.com/forms/d/e/1FAIpQLScLyP7bBbqMfGdspjL7Ij64UZ6v2KjqjKNbm8gwEsgWsFs_Qg/viewform?usp=header)')
|
|
|
569 |
|
570 |
+
st.image("logo.png", width=600)
|
571 |
|
572 |
df = upload_and_select_dataframe()
|
573 |
|
|
|
578 |
cols = edit_strings(df.columns)
|
579 |
cols = sorted(list(set(cols)))
|
580 |
|
581 |
+
main_option = st.selectbox("Please select an option:", ["Tabulation", "Funnel Analysis", "Segmentation Analysis", "Hypothesis test", "Machine Learning", "Sample Size Calculator" ,"Coding", "AI Chat"])
|
582 |
|
583 |
if main_option == "Tabulation":
|
584 |
st.header("Tabulation Analysis")
|
|
|
954 |
for chunk in stream:
|
955 |
st.warning(chunk.choices[0].delta.content, end="")
|
956 |
|
957 |
+
elif main_option == "Sample Size Calculator":
|
958 |
+
st.header("Sample Size Calculator")
|
959 |
+
|
960 |
+
confidence_level = int(st.text_input("Confidence levels:"))
|
961 |
+
p = int(st.text_input("Estimated probability of success:"))
|
962 |
+
E = int(st.text_input("Margin of error:"))
|
963 |
+
|
964 |
+
n = sample_size_calculator(confidence_level, p, E)
|
965 |
+
|
966 |
+
st.write(f"Sample size: {n}")
|
967 |
+
|
968 |
except Exception as e:
|
969 |
st.error(f"❌ Error: {e}")
|