reader-1 commited on
Commit
447b18c
·
verified ·
1 Parent(s): ffea178

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. Dockerfile +18 -14
  3. README.md +9 -14
  4. sync_data.sh +112 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1,19 +1,23 @@
1
- FROM python:3.12-bookworm
2
 
3
- RUN apt update && apt install -y git make
4
- RUN useradd -m -u 1000 user
5
- ENV PATH="/home/user/.local/bin:$PATH"
6
 
7
- RUN git clone https://github.com/embeddings-benchmark/mteb.git
8
- RUN chown -R user:user /mteb
9
 
10
- USER user
11
- WORKDIR /mteb
12
 
13
- RUN pip install "pydantic<2.11"
14
- RUN pip install ".[leaderboard]"
15
- # ENV XDG_CACHE_HOME=/home/user/.cache
16
- ENV GRADIO_SERVER_NAME="0.0.0.0"
17
- EXPOSE 7860
18
 
19
- CMD ["make", "run-leaderboard"]
 
 
 
 
 
 
 
 
 
1
+ FROM hectorqin/reader:latest
2
 
3
+ RUN apk update && \
4
+ apk add --no-cache python3 python3-dev py3-pip curl libxml2-dev libxslt-dev gcc musl-dev && \
5
+ rm -rf /var/cache/apk/*
6
 
7
+ RUN mkdir -p /logs /storage /file-uploads /data
 
8
 
9
+ RUN chmod a+w /logs /storage /file-uploads /data
 
10
 
11
+ ENV VIRTUAL_ENV=/opt/venv
12
+ RUN python3 -m venv $VIRTUAL_ENV
13
+ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 
 
14
 
15
+ RUN pip install --upgrade pip
16
+
17
+ RUN pip install --no-cache-dir requests webdavclient3
18
+
19
+ COPY sync_data.sh /sync_data.sh
20
+
21
+ RUN chmod +x /sync_data.sh
22
+
23
+ CMD ["/bin/sh", "/sync_data.sh"]
README.md CHANGED
@@ -1,20 +1,15 @@
1
  ---
2
- title: MTEB Leaderboard
3
- emoji: 🥇
4
  colorFrom: blue
5
- colorTo: indigo
6
  sdk: docker
7
- app_port: 7860
8
- app_file: app.py
9
- pinned: true
10
- tags:
11
- - leaderboard
12
- startup_duration_timeout: 1h
13
- fullWidth: true
14
- license: mit
15
- short_description: Embedding Leaderboard
16
  ---
17
 
18
- # MTEB Leaderboard
 
19
 
20
- Embedding Leaderboard
 
 
 
1
  ---
2
+ title: "1"
3
+ emoji: "🚀"
4
  colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
+ app_port: 8080
 
 
 
 
 
 
 
 
8
  ---
9
 
10
+ ### 🚀 一键部署
11
+ [![Deploy with HFSpaceDeploy](https://img.shields.io/badge/Deploy_with-HFSpaceDeploy-green?style=social&logo=rocket)](https://github.com/kfcx/HFSpaceDeploy)
12
 
13
+ 本项目由[HFSpaceDeploy](https://github.com/kfcx/HFSpaceDeploy)一键部署
14
+
15
+ Track, rank and evaluate open LLMs and chatbots
sync_data.sh ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # 检查环境变量
4
+ if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
5
+ echo "Starting without backup functionality - missing WEBDAV_URL, WEBDAV_USERNAME, or WEBDAV_PASSWORD"
6
+ exec java -jar /app/bin/reader.jar
7
+ exit 0
8
+ fi
9
+
10
+ # 设置备份路径
11
+ WEBDAV_BACKUP_PATH=${WEBDAV_BACKUP_PATH:-""}
12
+ FULL_WEBDAV_URL="${WEBDAV_URL}"
13
+ if [ -n "$WEBDAV_BACKUP_PATH" ]; then
14
+ FULL_WEBDAV_URL="${WEBDAV_URL}/${WEBDAV_BACKUP_PATH}"
15
+ fi
16
+
17
+ # 下载最新备份并恢复
18
+ restore_backup() {
19
+ python3 -c "
20
+ import sys
21
+ import os
22
+ import tarfile
23
+ import requests
24
+ from webdav3.client import Client
25
+ options = {
26
+ 'webdav_hostname': '$FULL_WEBDAV_URL',
27
+ 'webdav_login': '$WEBDAV_USERNAME',
28
+ 'webdav_password': '$WEBDAV_PASSWORD'
29
+ }
30
+ client = Client(options)
31
+ backups = [file for file in client.list() if file.endswith('.tar.gz') and file.startswith('reader_backup_')]
32
+ if not backups:
33
+ print('No backup files found')
34
+ sys.exit()
35
+ latest_backup = sorted(backups)[-1]
36
+ with requests.get(f'$FULL_WEBDAV_URL/{latest_backup}', auth=('$WEBDAV_USERNAME', '$WEBDAV_PASSWORD'), stream=True) as r:
37
+ if r.status_code == 200:
38
+ with open(f'/tmp/{latest_backup}', 'wb') as f:
39
+ for chunk in r.iter_content(chunk_size=8192):
40
+ f.write(chunk)
41
+
42
+ if os.path.exists(f'/tmp/{latest_backup}'):
43
+ with tarfile.open(f'/tmp/{latest_backup}', 'r:gz') as tar:
44
+ tar.extractall('/storage')
45
+ print(f'Successfully restored backup from {latest_backup}')
46
+ else:
47
+ print('Failed to download backup file')
48
+ else:
49
+ print(f'Failed to download backup: {r.status_code}')
50
+ "
51
+ }
52
+
53
+ # 首次启动时下载最新备份
54
+ echo "Downloading latest backup from WebDAV..."
55
+ restore_backup
56
+
57
+ # 同步函数
58
+ sync_data() {
59
+ while true; do
60
+ echo "Starting sync process at $(date)"
61
+
62
+ if [ -d /storage ]; then
63
+ timestamp=$(date +%Y%m%d_%H%M%S)
64
+ backup_file="reader_backup_${timestamp}.tar.gz"
65
+
66
+ # 压缩数据目录
67
+ tar -czf "/tmp/${backup_file}" -C /storage .
68
+
69
+ # 上传新备份到WebDAV
70
+ curl -u "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" -T "/tmp/${backup_file}" "$FULL_WEBDAV_URL/${backup_file}"
71
+ if [ $? -eq 0 ]; then
72
+ echo "Successfully uploaded ${backup_file} to WebDAV"
73
+ else
74
+ echo "Failed to upload ${backup_file} to WebDAV"
75
+ fi
76
+
77
+ # 清理旧备份文件
78
+ python3 -c "
79
+ import sys
80
+ from webdav3.client import Client
81
+ options = {
82
+ 'webdav_hostname': '$FULL_WEBDAV_URL',
83
+ 'webdav_login': '$WEBDAV_USERNAME',
84
+ 'webdav_password': '$WEBDAV_PASSWORD'
85
+ }
86
+ client = Client(options)
87
+ backups = [file for file in client.list() if file.endswith('.tar.gz') and file.startswith('reader_backup_')]
88
+ backups.sort()
89
+ if len(backups) > 2:
90
+ to_delete = len(backups) - 2
91
+ for file in backups[:to_delete]:
92
+ client.clean(file)
93
+ print(f'Successfully deleted {file}.')
94
+ else:
95
+ print('Only {} backups found, no need to clean.'.format(len(backups)))
96
+ " 2>&1
97
+
98
+ rm -f "/tmp/${backup_file}"
99
+ else
100
+ echo "/storage directory does not exist, waiting for next sync..."
101
+ fi
102
+
103
+ SYNC_INTERVAL=${SYNC_INTERVAL:-600}
104
+ sleep $SYNC_INTERVAL
105
+ done
106
+ }
107
+
108
+ # 后台启动同步进程
109
+ sync_data &
110
+
111
+ # 启动应用程序主进程
112
+ exec java -jar /app/bin/reader.jar