Spaces:
Build error
Build error
Upload 12 files
Browse files- .gitattributes +1 -0
- .gitignore +66 -0
- .replit +38 -0
- CONTRIBUTING.md +75 -0
- README.md +94 -12
- css.py +80 -0
- cultural_utils.py +43 -0
- generated-icon.png +3 -0
- pyproject.toml +24 -0
- replit.nix +10 -0
- styles.css +81 -0
- utils.py +94 -0
- uv.lock +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
generated-icon.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
|
23 |
+
# Virtual Environment
|
24 |
+
venv/
|
25 |
+
ENV/
|
26 |
+
env/
|
27 |
+
.env
|
28 |
+
|
29 |
+
# IDE
|
30 |
+
.idea/
|
31 |
+
.vscode/
|
32 |
+
*.swp
|
33 |
+
*.swo
|
34 |
+
.project
|
35 |
+
.pydevproject
|
36 |
+
|
37 |
+
# Logs
|
38 |
+
*.log
|
39 |
+
logs/
|
40 |
+
log/
|
41 |
+
|
42 |
+
# Testing
|
43 |
+
.coverage
|
44 |
+
htmlcov/
|
45 |
+
.pytest_cache/
|
46 |
+
.tox/
|
47 |
+
|
48 |
+
# Distribution
|
49 |
+
*.tar.gz
|
50 |
+
*.zip
|
51 |
+
|
52 |
+
# Replit specific
|
53 |
+
.replit
|
54 |
+
replit.nix
|
55 |
+
.breakpoints
|
56 |
+
.upm/
|
57 |
+
|
58 |
+
# Model files
|
59 |
+
*.pt
|
60 |
+
*.pth
|
61 |
+
*.bin
|
62 |
+
*.onnx
|
63 |
+
|
64 |
+
# Other
|
65 |
+
.DS_Store
|
66 |
+
Thumbs.db
|
.replit
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
modules = ["python-3.11", "python3"]
|
2 |
+
|
3 |
+
[nix]
|
4 |
+
channel = "stable-24_05"
|
5 |
+
|
6 |
+
[workflows]
|
7 |
+
runButton = "Project"
|
8 |
+
|
9 |
+
[[workflows.workflow]]
|
10 |
+
name = "Project"
|
11 |
+
mode = "parallel"
|
12 |
+
author = "agent"
|
13 |
+
|
14 |
+
[[workflows.workflow.tasks]]
|
15 |
+
task = "workflow.run"
|
16 |
+
args = "Translation App"
|
17 |
+
|
18 |
+
[[workflows.workflow]]
|
19 |
+
name = "Translation App"
|
20 |
+
author = "agent"
|
21 |
+
|
22 |
+
[workflows.workflow.metadata]
|
23 |
+
agentRequireRestartOnSave = false
|
24 |
+
|
25 |
+
[[workflows.workflow.tasks]]
|
26 |
+
task = "packager.installForAll"
|
27 |
+
|
28 |
+
[[workflows.workflow.tasks]]
|
29 |
+
task = "shell.exec"
|
30 |
+
args = "python app.py"
|
31 |
+
waitForPort = 8000
|
32 |
+
|
33 |
+
[deployment]
|
34 |
+
run = ["sh", "-c", "python app.py"]
|
35 |
+
|
36 |
+
[[ports]]
|
37 |
+
localPort = 8000
|
38 |
+
externalPort = 80
|
CONTRIBUTING.md
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributing to English-Farsi Translation Interface
|
2 |
+
|
3 |
+
Thank you for your interest in contributing to our project! This document provides guidelines and best practices for contributions.
|
4 |
+
|
5 |
+
## Code of Conduct
|
6 |
+
|
7 |
+
By participating in this project, you agree to maintain a respectful and inclusive environment for all contributors.
|
8 |
+
|
9 |
+
## Getting Started
|
10 |
+
|
11 |
+
1. Fork the repository
|
12 |
+
2. Create a new branch for your feature/fix
|
13 |
+
3. Write clean, documented code
|
14 |
+
4. Submit a pull request
|
15 |
+
|
16 |
+
## Development Guidelines
|
17 |
+
|
18 |
+
### Code Style
|
19 |
+
|
20 |
+
- Follow PEP 8 style guide for Python code
|
21 |
+
- Use meaningful variable and function names
|
22 |
+
- Add docstrings to functions and classes
|
23 |
+
- Keep functions focused and single-purpose
|
24 |
+
- Include type hints where applicable
|
25 |
+
|
26 |
+
### Testing
|
27 |
+
|
28 |
+
- Write unit tests for new features
|
29 |
+
- Ensure all tests pass before submitting PR
|
30 |
+
- Add integration tests for complex features
|
31 |
+
|
32 |
+
### Documentation
|
33 |
+
|
34 |
+
- Update README.md if adding new features
|
35 |
+
- Document API changes
|
36 |
+
- Include docstrings for new functions/classes
|
37 |
+
- Add comments for complex logic
|
38 |
+
|
39 |
+
### Commit Messages
|
40 |
+
|
41 |
+
- Use clear, descriptive commit messages
|
42 |
+
- Start with a verb (Add, Fix, Update, etc.)
|
43 |
+
- Keep messages concise but informative
|
44 |
+
|
45 |
+
Example:
|
46 |
+
```
|
47 |
+
Add text preprocessing for special characters
|
48 |
+
```
|
49 |
+
|
50 |
+
### Pull Request Process
|
51 |
+
|
52 |
+
1. Update documentation
|
53 |
+
2. Add/update tests
|
54 |
+
3. Ensure CI passes
|
55 |
+
4. Request review from maintainers
|
56 |
+
5. Address review feedback
|
57 |
+
|
58 |
+
## Feature Requests
|
59 |
+
|
60 |
+
- Use issue tracker for feature requests
|
61 |
+
- Clearly describe the feature and its benefits
|
62 |
+
- Include use cases where applicable
|
63 |
+
|
64 |
+
## Bug Reports
|
65 |
+
|
66 |
+
Include:
|
67 |
+
- Clear description of the issue
|
68 |
+
- Steps to reproduce
|
69 |
+
- Expected vs actual behavior
|
70 |
+
- System information
|
71 |
+
- Screenshots if applicable
|
72 |
+
|
73 |
+
## Questions?
|
74 |
+
|
75 |
+
Feel free to open an issue for any questions about contributing!
|
README.md
CHANGED
@@ -1,12 +1,94 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# English-Farsi Translation Interface 🌐
|
3 |
+
|
4 |
+
A sophisticated translation interface that provides culturally-sensitive translations between English and Farsi, powered by machine learning and enhanced with cultural context annotations.
|
5 |
+
|
6 |
+
## ✨ Features
|
7 |
+
|
8 |
+
- **Bidirectional Translation**: Seamless translation between English and Farsi
|
9 |
+
- **Cultural Context**: Provides explanations for idioms and cultural expressions
|
10 |
+
- **User-Friendly Interface**: Clean, intuitive Gradio-based web interface
|
11 |
+
- **Real-time Translation**: Instant translation with cultural annotations
|
12 |
+
- **RTL Support**: Full support for right-to-left text in Farsi
|
13 |
+
|
14 |
+
## 🚀 Quick Start
|
15 |
+
|
16 |
+
1. Clone the repository:
|
17 |
+
```bash
|
18 |
+
git clone https://github.com/yourusername/english-farsi-translator.git
|
19 |
+
cd english-farsi-translator
|
20 |
+
```
|
21 |
+
|
22 |
+
2. Install dependencies:
|
23 |
+
```bash
|
24 |
+
pip install -r requirements.txt
|
25 |
+
```
|
26 |
+
|
27 |
+
3. Run the application:
|
28 |
+
```bash
|
29 |
+
python app.py
|
30 |
+
```
|
31 |
+
|
32 |
+
The application will be available at `http://0.0.0.0:8000`
|
33 |
+
|
34 |
+
## 🛠️ Project Structure
|
35 |
+
|
36 |
+
```
|
37 |
+
├── app.py # Main application file
|
38 |
+
├── utils.py # Utility functions
|
39 |
+
├── cultural_utils.py # Cultural context handling
|
40 |
+
├── css.py # CSS styles for Gradio interface
|
41 |
+
├── styles.css # Additional CSS styles
|
42 |
+
├── docs/ # Documentation
|
43 |
+
├── tests/ # Test files
|
44 |
+
└── requirements.txt # Project dependencies
|
45 |
+
```
|
46 |
+
|
47 |
+
## 💡 Usage
|
48 |
+
|
49 |
+
1. Select source and target languages from the dropdown menus
|
50 |
+
2. Enter text in the input box
|
51 |
+
3. Click "Translate" to get the translation
|
52 |
+
4. View cultural context annotations below the translation
|
53 |
+
|
54 |
+
## 🔍 Features in Detail
|
55 |
+
|
56 |
+
- **Text Preprocessing**: Handles special characters and formatting
|
57 |
+
- **Cultural Context Detection**: Identifies and explains cultural idioms
|
58 |
+
- **Language Detection**: Automatic detection of input language
|
59 |
+
- **Error Handling**: Robust error management with helpful messages
|
60 |
+
- **Responsive Design**: Works on both desktop and mobile devices
|
61 |
+
|
62 |
+
## 🤝 Contributing
|
63 |
+
|
64 |
+
We welcome contributions! Please see our [Contributing Guidelines](CONTRIBUTING.md) for details on:
|
65 |
+
- Code style
|
66 |
+
- Development process
|
67 |
+
- Pull request process
|
68 |
+
- Testing requirements
|
69 |
+
|
70 |
+
## 📝 Documentation
|
71 |
+
|
72 |
+
For detailed information about the API and installation process, check:
|
73 |
+
- [API Documentation](docs/API.md)
|
74 |
+
- [Installation Guide](docs/INSTALLATION.md)
|
75 |
+
|
76 |
+
## ⚙️ Technical Requirements
|
77 |
+
|
78 |
+
- Python 3.8+
|
79 |
+
- Required packages:
|
80 |
+
- transformers
|
81 |
+
- gradio
|
82 |
+
- torch
|
83 |
+
- sentencepiece
|
84 |
+
- protobuf
|
85 |
+
|
86 |
+
## 🔒 License
|
87 |
+
|
88 |
+
This project is licensed under the MIT License.
|
89 |
+
|
90 |
+
## 🙏 Acknowledgments
|
91 |
+
|
92 |
+
- Persian NLP community for the translation model
|
93 |
+
- Contributors and maintainers
|
94 |
+
- Gradio team for the interface framework
|
css.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Custom CSS for the Gradio interface
|
2 |
+
custom_css = """
|
3 |
+
.gradio-container {
|
4 |
+
font-family: 'Noto Sans', 'Vazirmatn', sans-serif;
|
5 |
+
background-color: #F7F7F7;
|
6 |
+
color: #333333;
|
7 |
+
}
|
8 |
+
|
9 |
+
.primary-btn {
|
10 |
+
background-color: #2D8EFF !important;
|
11 |
+
color: white !important;
|
12 |
+
border: none !important;
|
13 |
+
padding: 10px 20px !important;
|
14 |
+
border-radius: 5px !important;
|
15 |
+
}
|
16 |
+
|
17 |
+
.secondary-btn {
|
18 |
+
background-color: #34B233 !important;
|
19 |
+
color: white !important;
|
20 |
+
}
|
21 |
+
|
22 |
+
.error-text {
|
23 |
+
color: #FF6B6B !important;
|
24 |
+
}
|
25 |
+
|
26 |
+
/* Input/Output containers */
|
27 |
+
.input-container, .output-container {
|
28 |
+
padding: 20px;
|
29 |
+
background: white;
|
30 |
+
border-radius: 8px;
|
31 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
32 |
+
}
|
33 |
+
|
34 |
+
/* RTL Support */
|
35 |
+
[dir="rtl"] {
|
36 |
+
text-align: right;
|
37 |
+
}
|
38 |
+
|
39 |
+
/* Responsive Design */
|
40 |
+
@media (max-width: 768px) {
|
41 |
+
.gradio-container {
|
42 |
+
padding: 10px;
|
43 |
+
}
|
44 |
+
|
45 |
+
.input-container, .output-container {
|
46 |
+
padding: 15px;
|
47 |
+
}
|
48 |
+
}
|
49 |
+
|
50 |
+
/* Loading State */
|
51 |
+
.loading {
|
52 |
+
border: 2px solid #2D8EFF;
|
53 |
+
border-radius: 50%;
|
54 |
+
border-top: 2px solid transparent;
|
55 |
+
animation: spin 1s linear infinite;
|
56 |
+
}
|
57 |
+
|
58 |
+
@keyframes spin {
|
59 |
+
0% { transform: rotate(0deg); }
|
60 |
+
100% { transform: rotate(360deg); }
|
61 |
+
}
|
62 |
+
|
63 |
+
/* Custom Scrollbar */
|
64 |
+
::-webkit-scrollbar {
|
65 |
+
width: 8px;
|
66 |
+
}
|
67 |
+
|
68 |
+
::-webkit-scrollbar-track {
|
69 |
+
background: #f1f1f1;
|
70 |
+
}
|
71 |
+
|
72 |
+
::-webkit-scrollbar-thumb {
|
73 |
+
background: #2D8EFF;
|
74 |
+
border-radius: 4px;
|
75 |
+
}
|
76 |
+
|
77 |
+
/* Font imports */
|
78 |
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans:wght@400;700&display=swap');
|
79 |
+
@import url('https://cdn.jsdelivr.net/gh/rastikerdar/[email protected]/Vazirmatn-font-face.css');
|
80 |
+
"""
|
cultural_utils.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utility module for managing cultural context annotations."""
|
2 |
+
from typing import Dict, Tuple, List
|
3 |
+
|
4 |
+
# Initial database of idioms and their cultural context
|
5 |
+
# Format: {idiom: (literal_translation, cultural_explanation)}
|
6 |
+
ENGLISH_IDIOMS: Dict[str, Tuple[str, str]] = {
|
7 |
+
"break the ice": ("شکستن یخ", "To initiate social interaction and reduce tension. In Persian culture, this concept is similar to 'گرم گرفتن' (warm taking) which emphasizes creating a warm, friendly atmosphere."),
|
8 |
+
"costs an arm and a leg": ("به قیمت یک دست و پا", "Very expensive. In Persian, a similar expression is 'سر به فلک کشیدن' (reaching the sky) to describe extremely high prices."),
|
9 |
+
"piece of cake": ("تکه کیک", "Something very easy to do. In Persian culture, the equivalent idiom is 'آب خوردن' (like drinking water) to describe a task that's very simple.")
|
10 |
+
}
|
11 |
+
|
12 |
+
PERSIAN_IDIOMS: Dict[str, Tuple[str, str]] = {
|
13 |
+
"آب خوردن": ("drinking water", "Used to describe something very easy, similar to the English 'piece of cake'."),
|
14 |
+
"دست و پنجه نرم کردن": ("softening hand and fingers", "To struggle or deal with something difficult, similar to 'wrestling with' in English."),
|
15 |
+
"دیوار موش داره موش هم گوش داره": ("the wall has mice and mice have ears", "Be careful what you say as others might be listening, similar to 'walls have ears' in English.")
|
16 |
+
}
|
17 |
+
|
18 |
+
def detect_idioms(text: str, source_lang: str) -> List[Tuple[str, str, str]]:
|
19 |
+
"""
|
20 |
+
Detect idioms in the input text and return their cultural context.
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
List of tuples (idiom, literal_translation, cultural_explanation)
|
24 |
+
"""
|
25 |
+
idioms_db = ENGLISH_IDIOMS if source_lang == "en" else PERSIAN_IDIOMS
|
26 |
+
found_idioms = []
|
27 |
+
|
28 |
+
for idiom in idioms_db:
|
29 |
+
if idiom.lower() in text.lower():
|
30 |
+
found_idioms.append((idiom, *idioms_db[idiom]))
|
31 |
+
|
32 |
+
return found_idioms
|
33 |
+
|
34 |
+
def get_cultural_context(text: str, source_lang: str) -> Dict[str, List[Tuple[str, str, str]]]:
|
35 |
+
"""
|
36 |
+
Get cultural context annotations for a given text.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
Dictionary with 'idioms' key containing list of detected idioms and their context
|
40 |
+
"""
|
41 |
+
return {
|
42 |
+
'idioms': detect_idioms(text, source_lang)
|
43 |
+
}
|
generated-icon.png
ADDED
![]() |
Git LFS Details
|
pyproject.toml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "repl-nix-workspace"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
requires-python = ">=3.11"
|
6 |
+
dependencies = [
|
7 |
+
"blobfile>=3.0.0",
|
8 |
+
"css>=0.1",
|
9 |
+
"gradio>=5.15.0",
|
10 |
+
"protobuf>=5.29.3",
|
11 |
+
"sentencepiece>=0.2.0",
|
12 |
+
"tiktoken>=0.8.0",
|
13 |
+
"torch>=2.6.0",
|
14 |
+
"transformers>=4.48.3",
|
15 |
+
]
|
16 |
+
|
17 |
+
[[tool.uv.index]]
|
18 |
+
explicit = true
|
19 |
+
name = "pytorch-cpu"
|
20 |
+
url = "https://download.pytorch.org/whl/cpu"
|
21 |
+
|
22 |
+
[tool.uv.sources]
|
23 |
+
torch = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }]
|
24 |
+
torchvision = [{ index = "pytorch-cpu", marker = "platform_system == 'Linux'" }]
|
replit.nix
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{pkgs}: {
|
2 |
+
deps = [
|
3 |
+
pkgs.pkg-config
|
4 |
+
pkgs.rustc
|
5 |
+
pkgs.libiconv
|
6 |
+
pkgs.cargo
|
7 |
+
pkgs.protobuf
|
8 |
+
pkgs.ffmpeg-full
|
9 |
+
];
|
10 |
+
}
|
styles.css
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
/* Font imports */
|
3 |
+
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans:wght@400;700&display=swap');
|
4 |
+
@import url('https://cdn.jsdelivr.net/gh/rastikerdar/[email protected]/Vazirmatn-font-face.css');
|
5 |
+
|
6 |
+
/* Base styles */
|
7 |
+
.gradio-container {
|
8 |
+
font-family: 'Noto Sans', 'Vazirmatn', sans-serif;
|
9 |
+
background-color: #F7F7F7;
|
10 |
+
color: #333333;
|
11 |
+
}
|
12 |
+
|
13 |
+
/* Button styles */
|
14 |
+
.primary-btn {
|
15 |
+
background-color: #2D8EFF !important;
|
16 |
+
color: white !important;
|
17 |
+
border: none !important;
|
18 |
+
padding: 10px 20px !important;
|
19 |
+
border-radius: 5px !important;
|
20 |
+
}
|
21 |
+
|
22 |
+
.secondary-btn {
|
23 |
+
background-color: #34B233 !important;
|
24 |
+
color: white !important;
|
25 |
+
}
|
26 |
+
|
27 |
+
/* Container styles */
|
28 |
+
.input-container, .output-container {
|
29 |
+
padding: 20px;
|
30 |
+
background: white;
|
31 |
+
border-radius: 8px;
|
32 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
33 |
+
}
|
34 |
+
|
35 |
+
/* RTL Support */
|
36 |
+
[dir="rtl"] {
|
37 |
+
text-align: right;
|
38 |
+
}
|
39 |
+
|
40 |
+
/* Status indicators */
|
41 |
+
.error-text {
|
42 |
+
color: #FF6B6B !important;
|
43 |
+
}
|
44 |
+
|
45 |
+
.loading {
|
46 |
+
border: 2px solid #2D8EFF;
|
47 |
+
border-radius: 50%;
|
48 |
+
border-top: 2px solid transparent;
|
49 |
+
animation: spin 1s linear infinite;
|
50 |
+
}
|
51 |
+
|
52 |
+
/* Animations */
|
53 |
+
@keyframes spin {
|
54 |
+
0% { transform: rotate(0deg); }
|
55 |
+
100% { transform: rotate(360deg); }
|
56 |
+
}
|
57 |
+
|
58 |
+
/* Responsive Design */
|
59 |
+
@media (max-width: 768px) {
|
60 |
+
.gradio-container {
|
61 |
+
padding: 10px;
|
62 |
+
}
|
63 |
+
|
64 |
+
.input-container, .output-container {
|
65 |
+
padding: 15px;
|
66 |
+
}
|
67 |
+
}
|
68 |
+
|
69 |
+
/* Custom Scrollbar */
|
70 |
+
::-webkit-scrollbar {
|
71 |
+
width: 8px;
|
72 |
+
}
|
73 |
+
|
74 |
+
::-webkit-scrollbar-track {
|
75 |
+
background: #f1f1f1;
|
76 |
+
}
|
77 |
+
|
78 |
+
::-webkit-scrollbar-thumb {
|
79 |
+
background: #2D8EFF;
|
80 |
+
border-radius: 4px;
|
81 |
+
}
|
utils.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import re
|
3 |
+
|
4 |
+
class TextProcessor:
|
5 |
+
"""Handles text processing operations for translation."""
|
6 |
+
|
7 |
+
MAX_LENGTH = 512
|
8 |
+
PERSIAN_NUMBERS = {
|
9 |
+
'0': '۰', '1': '۱', '2': '۲', '3': '۳', '4': '۴',
|
10 |
+
'5': '۵', '6': '۶', '7': '۷', '8': '۸', '9': '۹'
|
11 |
+
}
|
12 |
+
|
13 |
+
@staticmethod
|
14 |
+
def preprocess_text(text: str) -> str:
|
15 |
+
"""
|
16 |
+
Clean and prepare text for translation.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
text: Input text to process
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
Processed text ready for translation
|
23 |
+
"""
|
24 |
+
if not text:
|
25 |
+
return ""
|
26 |
+
|
27 |
+
# Normalize whitespace and remove special characters
|
28 |
+
text = ' '.join(text.split())
|
29 |
+
text = re.sub(r'[^\w\s.,!?-]', '', text)
|
30 |
+
|
31 |
+
return text[:TextProcessor.MAX_LENGTH]
|
32 |
+
|
33 |
+
@staticmethod
|
34 |
+
def postprocess_translation(text: str) -> str:
|
35 |
+
"""
|
36 |
+
Clean up translated text and normalize numbers.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
text: Translated text to process
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
Cleaned and normalized text
|
43 |
+
"""
|
44 |
+
if not text:
|
45 |
+
return ""
|
46 |
+
|
47 |
+
# Clean up model artifacts
|
48 |
+
text = text.replace("<pad>", "").replace("</s>", "").replace("<s>", "")
|
49 |
+
text = re.sub(r'\s+([.,!?])', r'\1', text)
|
50 |
+
text = ' '.join(text.split())
|
51 |
+
|
52 |
+
# Convert to Persian numbers
|
53 |
+
for en, fa in TextProcessor.PERSIAN_NUMBERS.items():
|
54 |
+
text = text.replace(en, fa)
|
55 |
+
|
56 |
+
return text.strip()
|
57 |
+
|
58 |
+
@staticmethod
|
59 |
+
def detect_language(text: str) -> str:
|
60 |
+
"""
|
61 |
+
Detect if text is primarily English or Farsi.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
text: Input text to analyze
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
'Farsi' or 'English' based on character frequency
|
68 |
+
"""
|
69 |
+
farsi_chars = len(re.findall(r'[\u0600-\u06FF]', text))
|
70 |
+
english_chars = len(re.findall(r'[a-zA-Z]', text))
|
71 |
+
return "Farsi" if farsi_chars > english_chars else "English"
|
72 |
+
|
73 |
+
@staticmethod
|
74 |
+
def validate_input(text: str) -> tuple[bool, str]:
|
75 |
+
"""
|
76 |
+
Validate input text length and content.
|
77 |
+
|
78 |
+
Args:
|
79 |
+
text: Input text to validate
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
Tuple of (is_valid, error_message)
|
83 |
+
"""
|
84 |
+
if not text or len(text.strip()) < 1:
|
85 |
+
return False, "Please enter text to translate"
|
86 |
+
if len(text) > TextProcessor.MAX_LENGTH:
|
87 |
+
return False, f"Input text is too long (maximum {TextProcessor.MAX_LENGTH} characters)"
|
88 |
+
return True, ""
|
89 |
+
|
90 |
+
# Expose static methods for backward compatibility
|
91 |
+
preprocess_text = TextProcessor.preprocess_text
|
92 |
+
postprocess_translation = TextProcessor.postprocess_translation
|
93 |
+
detect_language = TextProcessor.detect_language
|
94 |
+
validate_input = TextProcessor.validate_input
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|