vdwow commited on
Commit
10e4a4c
·
1 Parent(s): faf13f8

feat:light mode + token counter

Browse files
.python-version DELETED
@@ -1 +0,0 @@
1
- 3.12
 
 
.streamlit/config.toml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ textColor="#0B3B36"
4
+ primaryColor = "#00BF63"
app.py CHANGED
@@ -12,16 +12,30 @@ from src.content import (
12
 
13
  from src.expert import expert_mode
14
  from src.calculator import calculator_mode
 
15
 
16
- st.set_page_config(layout="wide",
17
- page_title="ECOLOGITS",
18
- page_icon='💬')
 
 
 
 
 
19
 
20
  st.html(HERO_TEXT)
21
 
22
  st.markdown(INTRO_TEXT)
23
 
24
- tab_calculator, tab_expert, tab_method, tab_about = st.tabs(['🧮 Calculator', '🤓 Expert Mode', '📖 Methodology', 'ℹ️ About'])
 
 
 
 
 
 
 
 
25
 
26
  with tab_calculator:
27
 
@@ -30,6 +44,10 @@ with tab_calculator:
30
  with tab_expert:
31
 
32
  expert_mode()
 
 
 
 
33
 
34
  with tab_method:
35
 
 
12
 
13
  from src.expert import expert_mode
14
  from src.calculator import calculator_mode
15
+ from src.token_estimator import token_estimator
16
 
17
+ st.set_page_config(
18
+ layout="wide",
19
+ page_title="ECOLOGITS",
20
+ page_icon='💬'
21
+ )
22
+
23
+ with open( "src/style.css" ) as css:
24
+ st.markdown( f'<style>{css.read()}</style>' , unsafe_allow_html= True)
25
 
26
  st.html(HERO_TEXT)
27
 
28
  st.markdown(INTRO_TEXT)
29
 
30
+ tab_calculator, tab_expert, tab_token, tab_method, tab_about = st.tabs(
31
+ [
32
+ '🧮 Calculator',
33
+ '🤓 Expert Mode',
34
+ '🪙 Tokens estimator',
35
+ '📖 Methodology',
36
+ 'ℹ️ About'
37
+ ]
38
+ )
39
 
40
  with tab_calculator:
41
 
 
44
  with tab_expert:
45
 
46
  expert_mode()
47
+
48
+ with tab_token:
49
+
50
+ token_estimator()
51
 
52
  with tab_method:
53
 
pyproject.toml DELETED
@@ -1,12 +0,0 @@
1
- [project]
2
- name = "ecologits-calculator-streamlit"
3
- version = "0.1.0"
4
- description = "Add your description here"
5
- readme = "README.md"
6
- requires-python = ">=3.12"
7
- dependencies = [
8
- "ecologits>=0.6.1",
9
- "pint>=0.24.4",
10
- "plotly>=6.0.0",
11
- "streamlit>=1.43.1",
12
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  streamlit
2
  ecologits
3
  pint
4
- plotly
 
 
1
  streamlit
2
  ecologits
3
  pint
4
+ plotly
5
+ tiktoken
src/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
  from .content import *
2
  from .constants import *
3
  from .expert import expert_mode
 
4
  from .utils import *
5
  from .calculator import calculator_mode
6
  from .impacts import get_impacts, display_impacts
 
1
  from .content import *
2
  from .constants import *
3
  from .expert import expert_mode
4
+ from .token_estimator import token_estimator
5
  from .utils import *
6
  from .calculator import calculator_mode
7
  from .impacts import get_impacts, display_impacts
src/__pycache__/__init__.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/__init__.cpython-312.pyc and b/src/__pycache__/__init__.cpython-312.pyc differ
 
src/__pycache__/calculator.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/calculator.cpython-312.pyc and b/src/__pycache__/calculator.cpython-312.pyc differ
 
src/__pycache__/constants.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/constants.cpython-312.pyc and b/src/__pycache__/constants.cpython-312.pyc differ
 
src/__pycache__/content.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/content.cpython-312.pyc and b/src/__pycache__/content.cpython-312.pyc differ
 
src/__pycache__/electricity_mix.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/electricity_mix.cpython-312.pyc and b/src/__pycache__/electricity_mix.cpython-312.pyc differ
 
src/__pycache__/expert.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/expert.cpython-312.pyc and b/src/__pycache__/expert.cpython-312.pyc differ
 
src/__pycache__/impacts.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/impacts.cpython-312.pyc and b/src/__pycache__/impacts.cpython-312.pyc differ
 
src/__pycache__/models.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/models.cpython-312.pyc and b/src/__pycache__/models.cpython-312.pyc differ
 
src/__pycache__/token_estimator.cpython-312.pyc ADDED
Binary file (1.9 kB). View file
 
src/__pycache__/utils.cpython-312.pyc CHANGED
Binary files a/src/__pycache__/utils.cpython-312.pyc and b/src/__pycache__/utils.cpython-312.pyc differ
 
src/content.py CHANGED
@@ -33,6 +33,34 @@ WARNING_BOTH = """
33
  ⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
34
  """
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ABOUT_TEXT = r"""
37
  ## 🎯 Our goal
38
  **The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**
 
33
  ⚠️ The model architecture has not been publicly released and is multimodal, expect lower precision of estimations.
34
  """
35
 
36
+ TOKEN_ESTIMATOR_TEXT = """
37
+ A token is the basic unit of text that a language model processes. Think of it as a piece of text that the model breaks down for analysis and generation. Tokens can be:
38
+
39
+ 1. Whole words
40
+ 2. Parts of words
41
+ 3. Punctuation marks
42
+ 4. Spaces or other whitespace characters
43
+
44
+ For example, the sentence "I love EcoLogits!" might be tokenized like this:
45
+
46
+ - "I" (1 token)
47
+ - "love" (1 token)
48
+ - "EcoLogits" (1 token)
49
+ - "!" (1 token)
50
+
51
+ Different tokenization methods exist : some models split tokens at spaces wile others use more complex algorithms that break words into subwords.
52
+
53
+ Tokens are crucial because:
54
+
55
+ - They determine the model's input and output capacity
56
+ - They impact processing speed and memory usage
57
+ - Most LLMs have a maximum token limit (e.g., 4,000 or 8,000 tokens)
58
+ - Longer texts require more tokens, which can increase computational complexity
59
+ - At EcoLogits, they are at the core of our impact assessment methodology !
60
+
61
+ When you're writing or interacting with an LLM, being aware of token count can help you manage input length and complexity more effectively.
62
+ """
63
+
64
  ABOUT_TEXT = r"""
65
  ## 🎯 Our goal
66
  **The main goal of the EcoLogits Calculator is to raise awareness on the environmental impacts of LLM inference.**
src/style.css ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=Montserrat:ital,wght@0,100..900;1,100..900&display=swap');
2
+
3
+ html, body, [class*="css"] {
4
+ font-family: 'Montserrat', sans-serif;
5
+ font-size: 18px;
6
+ font-weight: 500;
7
+ color: #091747;
8
+ }
9
+
10
+ [data-testid="metric-container"] {
11
+ width: fit-content;
12
+ margin: auto;
13
+ }
14
+
15
+ [data-testid="metric-container"] > div {
16
+ width: fit-content;
17
+ margin: auto;
18
+ }
19
+
20
+ [data-testid="metric-container"] label {
21
+ width: fit-content;
22
+ margin: auto;
23
+ }
src/token_estimator.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tiktoken
3
+ from .content import TOKEN_ESTIMATOR_TEXT
4
+
5
+ def num_tokens_from_string(string: str, encoding_name: str) -> int:
6
+ """Returns the number of tokens in a text string."""
7
+ encoding = tiktoken.get_encoding(encoding_name)
8
+ num_tokens = len(encoding.encode(string))
9
+ return num_tokens
10
+
11
+ def token_estimator():
12
+
13
+ st.markdown("### 🪙 Tokens estimator")
14
+
15
+ st.markdown("As our methodology deeply relies on the number of tokens processed by the model *(and as no-one is token-fluent)*, we provide you with a tool to estimate the number of tokens in a given text.")
16
+
17
+ st.expander("ℹ️ What is a token anyway ?", expanded = False).markdown(TOKEN_ESTIMATOR_TEXT)
18
+
19
+ user_text_input = st.text_area("Type or paste some text to estimate the amount of tokens.", "EcoLogits is a great project!")
20
+
21
+ _, col2, _ = st.columns([2, 1, 2])
22
+
23
+ with col2:
24
+
25
+ st.metric(
26
+ label = 'tokens estimated amount',
27
+ #label_visibility = 'hidden',
28
+ value = num_tokens_from_string(user_text_input, "cl100k_base"),
29
+ border = True
30
+ )
uv.lock DELETED
The diff for this file is too large to render. See raw diff