Upload 9 files

Browse files

Files changed (9) hide show

.gitattributes +1 -34
README.md +119 -3
config.json +44 -0
generation_config.json +7 -0
model.safetensors +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +44 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,2 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


























1	*.safetensors filter=lfs diff=lfs merge=lfs -text
2	+ tokenizer.model filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,119 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+datasets:
+- josedamico/sugarcane
+language:
+- en
+base_model:
+- TinyLlama/TinyLlama-1.1B-Chat-v1.0
+tags:
+- sugarcane
+---
+# 🌱 TinyLLaMA-Sugarcane
+Welcome to the *first open-source LLM fine-tuned for sugarcane production*! 🧠🌾
+This model is a fine-tuned version of [`TinyLLaMA`](https://huggingface.co/czi/TinyLlama-1.1B-Chat-v1.0), trained specifically on sugarcane-focused data. Developed by [SciCrop](https://scicrop.com) as part of its commitment to open innovation in agriculture, this is one of the first domain-specific small language models (SLMs) created for the agribusiness sector.
+---
+## 🚜 Why Sugarcane?
+Sugarcane is one of the most important crops in Brazil and globally — but most LLMs know very little about its specific production cycle, challenges, and terminology.
+By fine-tuning TinyLLaMA on 2,000+ question/answer pairs from real-world sugarcane use cases, we aim to deliver:
+- ✅ Better accuracy
+- ✅ Clearer answers
+- ✅ Local deployment capabilities for agricultural experts, cooperatives, and researchers
+---
+## 🔍 Model Details
+- **Base model**: `TinyLLaMA-1.1B-Chat`
+- **Fine-tuned on**: Domain-specific QA pairs related to sugarcane
+- **Architecture**: Causal LM with LoRA + QLoRA
+- **Tokenizer**: `LLaMATokenizer`
+- **Model size**: ~1.1B parameters
+- **Format**: Available in both HF standard and `GGUF` for local/Ollama use
+---
+## 🧪 Try it locally with Ollama
+We believe local models are the future for privacy-sensitive, domain-specific AI.
+You can run this model locally using [Ollama](https://ollama.com):
+```bash
+ollama run infinitestack/tinyllama-sugarcane
+```
+👉 Or explore the model directly:
+https://ollama.com/infinitestack/tinyllama-sugarcane
+---
+## 🌐 About InfiniteStack
+This model is part of **InfiniteStack**, a platform by [SciCrop](https://scicrop.com) that helps companies in the agri-food-energy-environment chain create, train, and deploy their own AI and analytics solutions — securely and at scale.
+### 📦 InfiniteStack offers:
+- A containerized platform that runs on-prem or in private cloud
+- Full support for **SLMs and LLMs** using your **real and private data**
+- No/Low-code interfaces to *Collect*, *Automate*, *Leverage*, *Catalog*, *Observe*, and *Track* data pipelines and AI assets
+🌐 Learn more: https://infinitestack.ai
+---
+## 🧠 Why Small Language Models (SLMs)?
+SLMs are great when:
+- You need local inference (offline, on-device, or private)
+- Your domain is narrow and specific
+- You want full control over fine-tuning and usage
+- You care about speed, size, and cost-efficiency
+Big isn’t always better. Sometimes, smart and focused beats giant and generic. 💡
+---
+## 🤝 Community & Open Innovation
+This work reflects SciCrop’s ongoing commitment to the open-source ecosystem, and to creating useful, usable AI for real-world agribusiness.
+Feel free to fork, contribute, fine-tune further, or use it in your own ag project.
+We’d love to hear how you're using it!
+---
+## 📂 Files included
+This repo includes:
+- `config.json`
+- `tokenizer.model`
+- `tokenizer.json`
+- `model.safetensors`
+- `special_tokens_map.json`
+- `generation_config.json`
+- `tokenizer_config.json`
+- `README.md`
+A merged and converted `.gguf` version is also available at **Ollama Hub**.
+---
+## 📬 Questions or Contributions?
+Ping us at:
+📧 [email protected]
+🌐 https://scicrop.com
+🌱 https://infinitestack.ai
+Made with ☕, 🌾 and ❤️ in Brazil
+by @josedamico and the InfiniteStack team

config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "float32",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "fp4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "vocab_size": 32000
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "max_length": 2048,
+  "pad_token_id": 0,
+  "transformers_version": "4.50.0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0994e22bd729a4bd6958fbe99d269af861d6683fab1d64bd9bc5400d99fc552
+size 134

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}