carbonnnnn commited on
Commit
3f2d847
·
1 Parent(s): dc57ae7

update languages

Browse files
Files changed (2) hide show
  1. src/process_data.py +30 -10
  2. tempapp.py +0 -33
src/process_data.py CHANGED
@@ -1,6 +1,7 @@
1
  import pandas as pd
2
  import json
3
  import os
 
4
 
5
  from src.collect_data import fetch_version_metadata, fetch_registry_data
6
  from assets.text_content import LANG_MAPPING
@@ -25,18 +26,37 @@ def clean_price(price):
25
  def map_languages(languages):
26
  if isinstance(languages, float) and pd.isna(languages):
27
  return None
28
- # If it's already a list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  if isinstance(languages, list):
30
- return ', '.join([LANG_MAPPING.get(str(lang), str(lang)) for lang in languages])
31
- # If it's a string
32
- if isinstance(languages, str):
33
- return ', '.join([LANG_MAPPING.get(lang.strip(), lang.strip()) for lang in languages.split(',')])
34
- # If it's an array or any other type
35
- try:
36
- return ', '.join([str(lang) for lang in languages])
37
- except:
38
- return str(languages)
39
 
 
 
 
40
  # Extract multimodality fields
41
  def get_multimodality_field(model_data, field):
42
  try:
 
1
  import pandas as pd
2
  import json
3
  import os
4
+ import pycountry
5
 
6
  from src.collect_data import fetch_version_metadata, fetch_registry_data
7
  from assets.text_content import LANG_MAPPING
 
26
  def map_languages(languages):
27
  if isinstance(languages, float) and pd.isna(languages):
28
  return None
29
+
30
+ def get_language_name(lang):
31
+ # Clean and standardize the language code
32
+ lang = str(lang).strip().lower()
33
+
34
+ # Try to find the language
35
+ try:
36
+ # First try as language code (en, fr, etc.)
37
+ language = pycountry.languages.get(alpha_2=lang)
38
+ if not language:
39
+ # Try as language name (English, French, etc.)
40
+ language = pycountry.languages.get(name=lang.capitalize())
41
+
42
+ return language.name if language else lang
43
+ except (AttributeError, LookupError):
44
+ return lang
45
+
46
+ # Handle different input types
47
  if isinstance(languages, list):
48
+ lang_list = languages
49
+ elif isinstance(languages, str):
50
+ lang_list = [l.strip() for l in languages.split(',')]
51
+ else:
52
+ try:
53
+ lang_list = list(languages)
54
+ except:
55
+ return str(languages)
 
56
 
57
+ # Map all languages and join them
58
+ return ', '.join(get_language_name(lang) for lang in lang_list)
59
+
60
  # Extract multimodality fields
61
  def get_multimodality_field(model_data, field):
62
  try:
tempapp.py DELETED
@@ -1,33 +0,0 @@
1
- import gradio as gr
2
- from date_rangeslider import RangeSlider
3
- from pathlib import Path
4
-
5
- text = "## The selected date range is: {min} to {max}"
6
-
7
- with gr.Blocks() as demo:
8
- with gr.Tabs():
9
- with gr.Tab("Demo"):
10
- gr.Markdown("""## 🛝 Date RangeSlider
11
-
12
- ## Drag either end and see the selected date range update in real-time.
13
- """)
14
- range_slider = RangeSlider(
15
- minimum="2023-01-01",
16
- maximum="2024-12-31",
17
- value=("2023-01-01", "2024-12-31")
18
- )
19
- range_ = gr.Markdown(value=text.format(min="2023-01-01", max="2024-12-31"))
20
- range_slider.change(
21
- lambda s: text.format(min=s[0], max=s[1]),
22
- range_slider,
23
- range_,
24
- show_progress="hide",
25
- trigger_mode="always_last"
26
- )
27
- gr.Examples([
28
- ("2023-03-01", "2023-06-30"),
29
- ("2023-07-01", "2023-12-31")
30
- ], inputs=[range_slider])
31
-
32
- if __name__ == "__main__":
33
- demo.launch()