Sigrid De los Santos commited on
Commit
9df4cc0
·
1 Parent(s): 342fd5f

Remove remaining binary file for Hugging Face

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ai_analysis/fin_sentiment.py +17 -0
  2. ai_analysis/fin_signal_tagging.py +15 -0
  3. data/ai_2025-06-03.md +109 -0
  4. data/ai_2025-07-04.md +78 -0
  5. data/ai_2025-07-04_1.md +104 -0
  6. data/combined_report.md +700 -0
  7. data/mining_2025-07-04.md +89 -0
  8. data/nuclear_energy_2025-06-03.md +130 -0
  9. data/nuclear_energy_2025-06-03_1.md +111 -0
  10. data/nuclear_energy_2025-07-02.md +133 -0
  11. data/nuclear_energy_2025-07-04.md +117 -0
  12. external/.DS_Store +0 -0
  13. external/FinGPT/.github/FUNDING.yml +12 -0
  14. external/FinGPT/.github/ISSUE_TEMPLATE/feature_request.md +20 -0
  15. external/FinGPT/.gitignore +141 -0
  16. external/FinGPT/.gitpod.yml +10 -0
  17. external/FinGPT/.idea/.gitignore +3 -0
  18. external/FinGPT/CODE_OF_CONDUCT.md +65 -0
  19. external/FinGPT/CONTRIBUTING.md +68 -0
  20. external/FinGPT/FinGPT_ Training with LoRA and Meta-Llama-3-8B.ipynb +0 -0
  21. external/FinGPT/FinGPT_Inference_Llama2_13B_falcon_7B_for_Beginners.ipynb +0 -0
  22. external/FinGPT/FinGPT_Training_LoRA_with_ChatGLM2_6B_for_Beginners_v2-2.ipynb +0 -0
  23. external/FinGPT/LICENSE +21 -0
  24. external/FinGPT/MANIFEST.in +1 -0
  25. external/FinGPT/README.md +384 -0
  26. external/FinGPT/fingpt/FinGPT_Benchmark/__init__.py +2 -0
  27. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/__init__.py +3 -0
  28. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/benchmarks.py +114 -0
  29. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/convfinqa.py +75 -0
  30. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/evaluate.sh +395 -0
  31. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fineval.py +72 -0
  32. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/finred.py +150 -0
  33. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fiqa.py +176 -0
  34. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fpb.py +168 -0
  35. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/headline.py +84 -0
  36. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/ner.py +94 -0
  37. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/nwgi.py +86 -0
  38. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/sentiment_templates.txt +5 -0
  39. external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/tfns.py +82 -0
  40. external/FinGPT/fingpt/FinGPT_Benchmark/config.json +33 -0
  41. external/FinGPT/fingpt/FinGPT_Benchmark/config_hf.json +11 -0
  42. external/FinGPT/fingpt/FinGPT_Benchmark/config_new.json +35 -0
  43. external/FinGPT/fingpt/FinGPT_Benchmark/data/__init__.py +0 -0
  44. external/FinGPT/fingpt/FinGPT_Benchmark/data/download.py +41 -0
  45. external/FinGPT/fingpt/FinGPT_Benchmark/data/prepare_data.ipynb +0 -0
  46. external/FinGPT/fingpt/FinGPT_Benchmark/demo.ipynb +715 -0
  47. external/FinGPT/fingpt/FinGPT_Benchmark/readme.md +169 -0
  48. external/FinGPT/fingpt/FinGPT_Benchmark/train.sh +547 -0
  49. external/FinGPT/fingpt/FinGPT_Benchmark/train_lora.py +198 -0
  50. external/FinGPT/fingpt/FinGPT_Benchmark/utils.py +216 -0
ai_analysis/fin_sentiment.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #uses HuggingFace pipeline to classify text sentiment (positive, neutral, negative) with FinBERT.
2
+
3
+ from transformers import pipeline
4
+
5
+ # Load FinBERT financial sentiment pipeline
6
+ sentiment_pipeline = pipeline(
7
+ "sentiment-analysis",
8
+ model="ProsusAI/finbert",
9
+ top_k=None
10
+ )
11
+
12
+ def analyze_sentiment(text):
13
+ try:
14
+ result = sentiment_pipeline(text[:512])[0] # limit size for tokenizer
15
+ return result["label"].lower(), round(result["score"], 3)
16
+ except Exception as e:
17
+ return "error", 0.0
ai_analysis/fin_signal_tagging.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ FINANCIAL_KEYWORDS = [
4
+ "IPO", "Series A", "Series B", "funding", "acquisition", "merger",
5
+ "partnership", "earnings", "revenue", "valuation", "investment",
6
+ "raise", "round", "debt", "exit", "seed", "growth", "MoM", "ARR", "burn rate"
7
+ ]
8
+
9
+ def extract_signals(text):
10
+ found = []
11
+ for kw in FINANCIAL_KEYWORDS:
12
+ pattern = r"\b" + re.escape(kw.lower()) + r"\b"
13
+ if re.search(pattern, text.lower()):
14
+ found.append(kw)
15
+ return list(set(found))
data/ai_2025-06-03.md ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > **Metrics**
3
+ > Topic: `AI`
4
+ > Articles Collected: `371`
5
+ > Generated: `2025-06-03 11:57`
6
+ >
7
+ # AI Value Investing Memo – Week Ending 2/June/2025 (Weekly Focus)
8
+
9
+ ## **Intro & Market Context**
10
+
11
+ This week in AI, the market accelerated along its current high-anticipation trajectory, with a cluster of activity in startup fundraising, M&A, and fresh enterprise adoption. While no single "breakthrough" event dominated headlines, several key themes emerged: (1) Venture capital continues to quietly roll up smaller firms into AI-centric portfolios, (2) corporate M&A is ramping up in the AI space, (3) established tech giants are focusing on massive compute expansion as agentic AI demand surges, (4) novel applications (psychiatry, fintech, legaltech, and biology) are moving into commercial and even IPO-ready scale, and (5) regulatory and privacy debate continues to follow AI's march into sensitive sectors.
12
+
13
+ General market sentiment remains optimistic but increasingly bifurcated: public equities in megacap AI (NVDA, MSFT, GOOG) are expensive, while an undercurrent of deep value persists among small caps and M&A targets. Smart money is increasingly shifting attention to niche, high-moat AI firms not yet in Wall Street's spotlight, particularly those with strong cash flows or unique IP.
14
+
15
+ ---
16
+
17
+ ## **1. Key Value Signals**
18
+
19
+ - **Startup Fundraising Surge:** Early- and mid-stage AI startups (Rillet, Snabbit, Inven, Valla, Symbl.ai) raised significant capital despite macro volatility ([TechCrunch](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/), [Tech Funding News](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/), [TechCrunch](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)).
20
+ - **Venture Roll-Ups:** Khosla Ventures and Elad Gil investing in AI-powered rollups of mature, cash-flow-positive companies — a signal that expertise and customer lists are the next moat ([TechCrunch](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)).
21
+ - **Compute Demand Surge:** Fintech, health, and banking adopting agentic AI, creating enormous compute needs (100x growth potential), favoring scale datacenter and semiconductor players ([FinTech Futures](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)).
22
+ - **M&A: Strategic AI Acquisitions:** Leidos (LDOS) acquires AI/cyber firm Kudu Dynamics. Invoca acquires Symbl.ai — precedent for AI-focused M&A across sectors ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)).
23
+ - **AI in Regulated Sectors:** Major inroads made in banking (fraud, loan origination), health (psychiatry, biology), and legaltech (Valla, legal recourse for employees) ([Nature](https://www.nature.com/articles/s41380-025-03072-3), [Rude Baguette](https://www.rudebaguette.com/en/2025/06/ai-finally-did-it-breakthrough-in-biology-solves-a-mystery-scientists-have-been-chasing-for-over-30-years/)).
24
+ - **Data Privacy & Regulation:** Growing calls for comprehensive regulation — creates compliance and consulting tailwinds for niche AI/data security players ([Dark Reading](https://www.darkreading.com/cyber-risk/rethinking-data-privacy-age-generative-ai)).
25
+
26
+ ---
27
+
28
+ ## **2. Stocks or Startups to Watch**
29
+
30
+ ### **Public Companies**
31
+ - **Leidos Holdings (NYSE: LDOS)**
32
+ - Trigger: Acquired AI-focused cyber firm Kudu Dynamics for $300M cash ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)).
33
+ - Stats: P/E ~16, ROE ~16%, Market Cap ~$17.6B (as of May 2025): stable, defense/cyber/AI mix, decent value for its sector.
34
+ - Watch for: Expanded AI defense/cyber offering, M&A synergy upside.
35
+
36
+ - **Invoca** (private, potential IPO/M&A target)
37
+ - Trigger: Acquired Symbl.ai (AI-powered customer experience, $23M funding) — raises profile as a revenue automation leader.
38
+
39
+ ### **Notable Startups & VC-Backed Companies**
40
+ - **Rillet**
41
+ - Trigger: Raised $25M Series A (Sequoia, <1 yr post-seed). Focus: AI for finance/accounting automation ([TechCrunch](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/)).
42
+ - Value Note: Early institutional traction + rapid fundraising, in nascent AI-for-services vertical.
43
+
44
+ - **Valla**
45
+ - Trigger: $2.7M seed to democratize legal recourse using GenAI; focus on employee rights ([TechCrunch](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)).
46
+ - Value Note: High regulatory moat, early traction, strong founder narrative.
47
+
48
+ - **Inven**
49
+ - Trigger: $12.75M for AI-native deal sourcing (potential to disrupt PitchBook and legacy PE data vendors) ([Tech Funding News](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/)).
50
+ - Value Note: Unique vertical for AI, early validation.
51
+
52
+ - **Symbl.ai** (acquired by Invoca)
53
+ - Trigger: AI-powered conversation intelligence; validates VC-funded exit path for vertical AI.
54
+
55
+ - **Agentic AI, Data Privacy, and Fraud Detection Startups**
56
+ - Trigger: Fintech demand for agentic AI, "AI factories", and fraud detection = greenfield for private AI infra startups ([FinTech Futures](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)).
57
+
58
+ ---
59
+
60
+ ## **3. What Smart Money Might Be Acting On**
61
+
62
+ - **AI Rollup Trend:** Major VCs (Khosla, Elad Gil) are moving beyond backing pure-play startups to quietly acquiring and aggregating legacy companies, layering AI products on top ([TechCrunch](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)).
63
+ - **Why:** Lower risk than bleeding-edge AI bets, immediate cash flow, and quick access to hard-to-get enterprise customers.
64
+
65
+ - **Enterprise AI B2B**: Bet on startups with regulatory/vertical moats (finance, healthcare, legal) rather than direct consumer GenAI, where hype/competition is fierce.
66
+
67
+ - **AI-Driven M&A:** Incumbents in security, defense, and SaaS (like Leidos, Invoca) are primed to bolt on AI capabilities quickly — making small-cap/public firms with unique IP potential targets.
68
+
69
+ - **Compute Infrastructure:** Buy or build into companies with data center, AI-chip exposure, or proprietary algorithms serving banks, fintechs, or life sciences.
70
+
71
+ - **Compliance/Privacy**: Funds may flow to specialist consultancies and SaaS with privacy/compliance focus, as regulatory overhang tightens.
72
+
73
+ ---
74
+
75
+ ## **4. References**
76
+
77
+ - [AI in Psychiatry](https://www.nature.com/articles/s41380-025-03072-3) | [AI for Biology](https://www.rudebaguette.com/en/2025/06/ai-finally-did-it-breakthrough-in-biology-solves-a-mystery-scientists-have-been-chasing-for-over-30-years/)
78
+ - [Fintech/Agentic AI Demand](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)
79
+ - [Leidos, Invoca M&A](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
80
+ - [Khosla/Elad Gil Rollups](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch on Rollups](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)
81
+ - [Rillet/Startup Rounds](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/), [Inven Raise](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/), [Valla Seed](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)
82
+ - [Data Privacy/Regulation](https://www.darkreading.com/cyber-risk/rethinking-data-privacy-age-generative-ai)
83
+ - [AI Macro Debate](https://www.forbes.com/sites/bernardmarr/2025/05/23/ai-could-reshape-humanity-and-we-have-no-plan-for-it/)
84
+
85
+ ---
86
+
87
+ ## **5. Investment Hypothesis**
88
+
89
+ - **The Cream Rises:** Amidst AI hype, value is accruing fastest to (a) established firms acquiring AI/niche tech, (b) small/midcap vertical SaaS/AI companies with regulatory moats, and (c) strategic AI-powered rollups with serious institutional expertise and cash flow.
90
+ - **Key Thesis:** Ignore the frothy megacap multiples; focus on under-followed AI stocks and private companies with:
91
+ - Proven B2B or SaaS revenue,
92
+ - Unique IP/defensible verticals,
93
+ - cashflow or recent M&A/VC validation,
94
+ - are potential rollup or acquisition targets.
95
+ - **Tailwind:** Surging demand in regulated and semi-regulated verticals (health, finance, defense, legal).
96
+ - **Headwind:** Regulatory/ethical scrutiny could increase cost of doing business for generalist GenAI players — favoring those with purpose-built compliance tools or vertical knowledge.
97
+
98
+ **Bottom Line:**
99
+ - Watch for M&A in defense/cybersecurity, SaaS, and AI-powered B2B plays (Leidos, Invoca, Rillet).
100
+ - Track VC-backed AI rollups as stealth vehicles for value creation and future IPO/M&A pops.
101
+ - Seek out early-stage startups in vertical SaaS or legaltech deploying AI in compliance-intensive settings.
102
+ - Physical compute/infra players serving agentic AI (AMD, data centers) continue to benefit from secular demand.
103
+
104
+ ---
105
+
106
+ **Broad Summary of This Week's AI News:**
107
+ The week was dominated by continued VC confidence, strategic M&A, and institutional moves in vertical AI applications, with significant attention on small-cap and startup valuations. Macroeconomic backdrop remains strong for AI demand, but value opportunities lie beneath the surface in rollups, newly funded vertical SaaS, and compliance-driven niches. Regulatory risk rising but also carving new investable moats.
108
+
109
+ ---
data/ai_2025-07-04.md ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > Topic: `AI`
3
+ > Articles Collected: `163`
4
+ > Generated: `2025-07-04 13:40`
5
+ >
6
+ # Value Investor AI Weekly Memo
7
+ **Week of June 30 – July 6, 2025**
8
+
9
+ ---
10
+
11
+ ## Market Sentiment & Trends
12
+
13
+ This week, the AI market continues to be characterized by robust *growth optimism* and *massive capital deployment*. Sentiment remains largely positive in infrastructure and applied AI, but there is rising skepticism toward sky-high private market valuations in some fast-following startups. Major headlines focus on AI’s influence in cybersecurity, legal, HR, and consulting verticals, as well as the continuing "picks and shovels" theme in datacenter hardware and services.
14
+ *No major regulatory shocks noted*, but institutions and investors are expressing caution about the sustainability of AI startup valuations and possible hype cycles.
15
+
16
+ ---
17
+
18
+ ## 1. Key Value Signals
19
+
20
+ - **Infrastructure Focus Remains Dominant:** The highest conviction for value investing is in AI infrastructure—hardware, datacenters, and core networking.
21
+ - **M&A and Partnership Activity:** Notable signals like Apple considering partnerships/acquisitions for Siri enhancements (Anthropic, OpenAI) and SoftBank moving aggressively on artificial superintelligence with multi-phase global projects.
22
+ - **Startup Capital Flows Accelerating:** Noteworthy rounds at Harvey ($300M Series E, legal AI), Abridge ($300M Series E, medical AI), Metaview ($35M, hiring), and Lovable ($150M rumored). However, most are at steep valuations (>$2B pre/post-money).
23
+ - **Insider & Smart Money Activity:** a16z, Kleiner Perkins, Coatue, Google Ventures, and Sequoia are active, with Glasswing Ventures discussing new AI funding strategies.
24
+ - **Geographic Expansion:** SoftBank’s ASI moves and Asia-centric “build with context” approach highlight a more sustainable, potentially undervalued new-entrant pipeline.
25
+
26
+ ---
27
+
28
+ ## 2. Stocks or Startups to Watch
29
+
30
+ ### **Public Markets:**
31
+ - **Arista Networks (ANET)**, **Nvidia (NVDA)**, **AMD (AMD):** “Picks and shovels” for the AI gold rush—datacenter, networking, compute chips. *Arista* has a lower valuation multiple than Nvidia, still strong ROE, and is less crowded.
32
+ - **SoftBank (SFTBY/SFTBF):** The push for "artificial superintelligence" signals heavy capital spend, but could be an undervalued play if execution improves and Vision Fund losses subside.
33
+ - **Apple (AAPL):** Movement on AI partnerships/acquisitions may re-rate Siri’s potential, although Apple trades rich by value standards.
34
+
35
+ ### **Private/Startup Watchlist:**
36
+ - **Harvey (Legal AI):** $5B valuation, but massive adoption potential for legal transformation; recently had consecutive mega-rounds—possibly ahead of fundamentals.
37
+ - **Abridge (Healthcare AI):** $5.3B valuation; automating medical notes is a real use-case, but valuation steep.
38
+ - **Metaview (Recruitment AI):** Google Ventures led; automating/bias-reducing hiring—smaller, earlier, potentially higher reward.
39
+ - **Lovable:** On track for $150M at $2B. Early-stage AI firm, unknown fundamentals, but worth tracking as a potential future public market debut.
40
+
41
+ ### **Infrastructure enablers:**
42
+ - **Scott Data (Private):** Midwest US data center, supporting AI startups—potential for M&A or IPO as picks-and-shovels to the AI startup wave.
43
+ - **Industrial/Manufacturing AI:** Watch industrial AI “digital twins” and multimodal analytics for less-flashy, but real, B2B moats.
44
+
45
+ ---
46
+
47
+ ## 3. What Smart Money Might Be Acting On
48
+
49
+ - **Private Market Rotation:** Top VCs (Kleiner Perkins, a16z, Coatue, Sequoia, Google Ventures) are doubling down on AI startups, but selectively—pivoting more to infrastructure, HR, and healthcare use-cases where actual adoption is measurable.
50
+ - **Datacenter & Networking Expansion:** Institutional and growth investors pushing into datacenter, network, and hardware plays over frothy model-chatbot proliferators.
51
+ - **“Asia Build” Angle:** Long-term capital weighs Asian AI execution models, where blitzscaling is shunned for capital efficiency. Early institutional allocation might offer less-overpriced entry into the next breakout AI winners.
52
+
53
+ ---
54
+
55
+ ## 4. References
56
+
57
+ - [Forbes: AI Hype Cycle & Infrastructure](https://www.forbes.com/sites/rscottraynovich/2025/07/01/inside-the-ai-hype-cycle-whats-next-for-enterprise-ai/)
58
+ - [RCR Wireless: SoftBank's Superintelligence Ambitions](https://www.rcrwireless.com/20250630/ai-infrastructure/softbank-artificial)
59
+ - [TechCrunch: Harvey, Abridge funding](https://techcrunch.com/2025/06/27/startups-weekly-tech-and-the-law/)
60
+ - [Startup Ecosystem Canada: Lovable AI funding](https://www.startupecosystem.ca/news/lovable-ai-startup-on-track-to-raise-150m-at-2b-valuation/)
61
+ - [GovTech: Scott Data, Omaha AI infrastructure partnership](https://www.govtech.com/artificial-intelligence/partnership-looks-to-drive-ai-adoption-in-omaha-neb)
62
+ - [Mining Technology: Industrial/Multimodal AI](https://www.mining-technology.com/sponsored/whats-next-for-industrial-ai-five-key-developments-shaping-the-space/)
63
+ - [Business Insider: Claude/Anthropic, Microsoft AI as a core workflow](https://www.businessinsider.com/claude-ran-store-anthropic-ai-agent-lessons-learned-middle-managers-2025-6)
64
+
65
+ ---
66
+
67
+ ## 5. Investment Hypothesis
68
+
69
+ **The market is in the mid-to-late innings of the first generative AI value cycle. Near-term value is likely to accrue to AI infrastructure enablers (datacenter, networking, compute), NOT to richly-priced flashy model startups. The next wave UNLOCK is in B2B-specific verticals—manufacturing, healthcare, legal, hiring—especially those with defensible data or workflows (moats). Early-stage infrastructure providers outside the Bay Area (e.g., Midwest data centers, lower-multiple Asia AI shops) may offer underappreciated value. SoftBank’s renewed push and Apple’s partnership strategy suggest major future M&A, benefiting core AI tech and infrastructure players.**
70
+
71
+ ### **Screen for:**
72
+ - Public tech with strong fundamentals (low P/E, high ROE, cash flows) in critical infrastructure (Arista, AMD)
73
+ - Private companies with repeat-use, high-barrier products — notably in B2B SaaS, industrial, or privacy-compliant hiring/medtech AI
74
+ - Undercovered, smaller infrastructure shops and regional datacenter players (public or potential IPO/M&A targets)
75
+
76
+ ---
77
+
78
+ **(Caveat: Recent startup valuations may be unsustainably high. Exercise discipline; seek evidence of unit economics and actual cashflow, not just growth metrics.)**
data/ai_2025-07-04_1.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > Topic: `AI`
3
+ > Articles Collected: `116`
4
+ > Generated: `2025-07-04 14:12`
5
+ >
6
+ # AI Weekly Value Investor Memo
7
+ ### Week of July 1st, 2025
8
+
9
+ ---
10
+
11
+ ## 0. Market Context & Sentiment
12
+
13
+ **Macro & Sentiment:**
14
+ This week, investor sentiment in the AI sector remains broadly bullish, underpinned by continued enterprise adoption and “picks and shovels” investment themes. Headlines focus on the scaling of AI towards Artificial General Intelligence (AGI) and hype cycles driven by big checks for vertical SaaS startups and infrastructure vendors. No single transformative event, but sustained high valuations, strong VC focus on “AI-native” startups, and large checks pouring into cybersecurity and legal GTM plays. Amid frothy valuations, institutional players (SoftBank, Big Tech) double down on infrastructure and ecosystem strategies over direct “brain” competition.
15
+
16
+ Key macro trends:
17
+ - Rising data center demand, with local partnerships (Omaha) echoing US reshoring and digitization.
18
+ - Regulatory posture remains ambiguous—no new moats from regulation, but AI safety and compliance themes prominent.
19
+ - Cyclical hype for AGI but little near-term fundamental change.
20
+ - Venture market bifurcates: mature infra stocks pop while newer AI SaaS valuations are punchy, not value-oriented.
21
+
22
+ ---
23
+
24
+ ## 1. Key Value Signals (This Week)
25
+
26
+ - **Infrastructure Over Apps:** Forbes, TechCrunch, and several VC sources stress infrastructure (“picks & shovels”) wins: Nvidia, AMD, Arista Networks. Underlying: new data center buildouts are where margins and moats are consolidating.
27
+ - **Big Funding, High Valuation Startups:** Multiple $300M Series E rounds at $5B valuations (Harvey AI, Abridge); notable, but reflect more exit-hunting than deep value.
28
+ - **SoftBank AI Ambitions:** SoftBank signals multi-phase, multi-location investments in ASI (Artificial Super Intelligence) infrastructure, not “hot” consumer apps.
29
+ - **Apple Chasing Leading Models:** Rumors of Apple exploring partnerships with OpenAI and Anthropic for Siri enhancement.
30
+ - **Cybersecurity Demand:** AI investment is driving new security solutions; sector heating up.
31
+
32
+ ---
33
+
34
+ ## 2. Stocks or Startups to Watch
35
+
36
+ ### Infrastructure
37
+
38
+ - **Arista Networks (ANET):** Under the radar, strong FCF, high ROE, reasonable P/E (~30), and riding AI data center build.
39
+ - **Super Micro Computer (SMCI):** Hardware picks & shovels, high revenue growth, yet P/E and P/B are both lower than high-flying AI SaaS, FCF positive.
40
+ - **Vertiv Holdings (VRT):** Power/cooling for AI data centers, high operating margins, FCF, tailwinds from capex cycle.
41
+
42
+ ### AI-Driven Cybersecurity
43
+
44
+ - **SentinelOne (S):** Recently turned cash flow positive, but price high—watch for dips.
45
+ - **Crowdstrike (CRWD):** Pricey, but moat is growing, sectoral tailwinds.
46
+ - **Small Caps:** Flashpoint and startups surfaced in cybersecurity coverage—potential acquisition targets.
47
+
48
+ ### Newer, Less-Observed Startups
49
+
50
+ - **Lovable:** On track for $150M at $2B—early, but if growth justifies valuation, could be a midterm play if multiples contract.
51
+ - **Gruve (tech consulting AI):** High-growth, vertical AI consulting. Private, but watch for eventual IPO or acquisition.
52
+ - **Metaview:** Google Ventures-backed AI for interview/recruitment; market is early but growing.
53
+
54
+ ### Legal & Medical AI
55
+
56
+ - **Harvey AI, Abridge:** Both received massive late-stage rounds. High valuation-to-revenue ratios. Watch for longer-term public market readiness or cooling-off.
57
+
58
+ ---
59
+
60
+ ## 3. What Smart Money Might Be Acting On
61
+
62
+ - **Follow the Infra CapEx:** Hedge funds and institutions likely overweighting core infrastructure—semis, networking, power, storage—where margin, volume, and defensible moats (via high switching costs or regulatory inertia) exist.
63
+ - **Avoiding Overhyped SaaS/Vertical Plays for Now:** Recent late-stage venture rounds suggest IPO ambitions but present frothy valuations—a signal to wait for broader tech multiple contraction.
64
+ - **Vulture Mode on Early AI Cybersecurity:** Acquisitive public cos may shop among underfunded cybersecurity startups as hype cools.
65
+ - **Monitoring Apple’s Moves:** Any deal with OpenAI/Anthropic for Siri would shift market sentiment and spark M&A or partnership runs in vertical AI.
66
+ - **Asian Market Infrastructure Approaches:** With lower “blitzscaling” and more focus on fundamentals (SoftBank, Asia model), look for undercovered APAC infra names with value metrics.
67
+
68
+ ---
69
+
70
+ ## 4. References
71
+
72
+ Cited news articles, notable for context:
73
+ - [Inside The AI Hype Cycle: What’s Next For Enterprise AI? - Forbes](https://www.forbes.com/sites/rscottraynovich/2025/07/01/inside-the-ai-hype-cycle-whats-next-for-enterprise-ai/)
74
+ - [SoftBank aims to lead artificial super intelligence era - RCR Wireless News](https://www.rcrwireless.com/20250630/ai-infrastructure/softbank-artificial)
75
+ - [Apple Explores Anthropic and OpenAI for Siri AI Enhancement - Startup Ecosystem Canada](https://www.startupecosystem.ca/news/apple-explores-anthropic-and-openai-for-siri-ai-enhancement/)
76
+ - [Surging Investments in AI Are Transforming Cybersecurity - Forbes](https://www.forbes.com/sites/chuckbrooks/2025/06/27/surging-investments-in-ai-are-transforming-cybersecurity/)
77
+ - [Venture Gapital - Forbes](https://www.forbes.com/sites/richkarlgaard/2025/07/04/venture-gapital/)
78
+ - [Startups Weekly: Tech and the law - TechCrunch](https://techcrunch.com/2025/06/27/startups-weekly-tech-and-the-law/)
79
+ - [Lovable AI Startup on Track to Raise $150M at $2B Valuation - Startup Ecosystem Canada](https://www.startupecosystem.ca/news/lovable-ai-startup-on-track-to-raise-150m-at-2b-valuation/)
80
+
81
+ ---
82
+
83
+ ## 5. Investment Hypothesis
84
+
85
+ ### Core Thesis
86
+ **Value is centered in infrastructure providers—hardware, networking, power, and core security—catering to the real, growing CapEx demand of AI buildout. Avoid vertical SaaS and “hot” application startups unless multiples meaningfully contract or unique moats emerge.**
87
+
88
+ **Rationale:**
89
+ - AI infrastructure remains the largest, highest-margin, most defensible section of the ecosystem, with strong fundamentals (low P/E, high ROE, FCF-rich) and clear secular tailwinds.
90
+ - Application layer (AI native SaaS) is flush with VC cash, driving up valuations, and has a high “hype/actual value” risk unless real-world defensible moats or sticky enterprise contracts are evident.
91
+ - Smart money is playing infrastructure, not moonshots in AGI or narrow applications—at least until the next wave of down valuations.
92
+
93
+ ### Strategy
94
+ - **Overweight:** Data center, networking, and power infrastructure (ANET, SMCI, VRT).
95
+ - **Underweight/monitor:** Late-stage application-layer startups (Harvey, Abridge, Lovable) for valuation resets.
96
+ - **Watch list:** Early AI cybersec, APAC infrastructure with value metrics, Apple partnership signals.
97
+ - **Regulatory angle:** Unlikely to drive new barriers-to-entry/moats this cycle; invest in moats built on high switching costs or network effects.
98
+
99
+ ---
100
+
101
+ **Conclusion:**
102
+ This is a “follow the pipes, not the flowers” moment. Let VCs chase $5B AI SaaS rounds—the best value lies in asset-light infrastructure plays with strong balance sheets, high returns on equity, and multi-year secular tailwinds.
103
+
104
+ ---
data/combined_report.md ADDED
@@ -0,0 +1,700 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ ---
4
+
5
+
6
+ > **Metrics**
7
+ > Topic: `AI`
8
+ > Articles Collected: `371`
9
+ > Generated: `2025-06-03 11:57`
10
+ >
11
+ # AI Value Investing Memo – Week Ending 2/June/2025 (Weekly Focus)
12
+
13
+ ## **Intro & Market Context**
14
+
15
+ This week in AI, the market accelerated along its current high-anticipation trajectory, with a cluster of activity in startup fundraising, M&A, and fresh enterprise adoption. While no single "breakthrough" event dominated headlines, several key themes emerged: (1) Venture capital continues to quietly roll up smaller firms into AI-centric portfolios, (2) corporate M&A is ramping up in the AI space, (3) established tech giants are focusing on massive compute expansion as agentic AI demand surges, (4) novel applications (psychiatry, fintech, legaltech, and biology) are moving into commercial and even IPO-ready scale, and (5) regulatory and privacy debate continues to follow AI's march into sensitive sectors.
16
+
17
+ General market sentiment remains optimistic but increasingly bifurcated: public equities in megacap AI (NVDA, MSFT, GOOG) are expensive, while an undercurrent of deep value persists among small caps and M&A targets. Smart money is increasingly shifting attention to niche, high-moat AI firms not yet in Wall Street's spotlight, particularly those with strong cash flows or unique IP.
18
+
19
+ ---
20
+
21
+ ## **1. Key Value Signals**
22
+
23
+ - **Startup Fundraising Surge:** Early- and mid-stage AI startups (Rillet, Snabbit, Inven, Valla, Symbl.ai) raised significant capital despite macro volatility ([TechCrunch](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/), [Tech Funding News](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/), [TechCrunch](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)).
24
+ - **Venture Roll-Ups:** Khosla Ventures and Elad Gil investing in AI-powered rollups of mature, cash-flow-positive companies — a signal that expertise and customer lists are the next moat ([TechCrunch](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)).
25
+ - **Compute Demand Surge:** Fintech, health, and banking adopting agentic AI, creating enormous compute needs (100x growth potential), favoring scale datacenter and semiconductor players ([FinTech Futures](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)).
26
+ - **M&A: Strategic AI Acquisitions:** Leidos (LDOS) acquires AI/cyber firm Kudu Dynamics. Invoca acquires Symbl.ai — precedent for AI-focused M&A across sectors ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)).
27
+ - **AI in Regulated Sectors:** Major inroads made in banking (fraud, loan origination), health (psychiatry, biology), and legaltech (Valla, legal recourse for employees) ([Nature](https://www.nature.com/articles/s41380-025-03072-3), [Rude Baguette](https://www.rudebaguette.com/en/2025/06/ai-finally-did-it-breakthrough-in-biology-solves-a-mystery-scientists-have-been-chasing-for-over-30-years/)).
28
+ - **Data Privacy & Regulation:** Growing calls for comprehensive regulation — creates compliance and consulting tailwinds for niche AI/data security players ([Dark Reading](https://www.darkreading.com/cyber-risk/rethinking-data-privacy-age-generative-ai)).
29
+
30
+ ---
31
+
32
+ ## **2. Stocks or Startups to Watch**
33
+
34
+ ### **Public Companies**
35
+ - **Leidos Holdings (NYSE: LDOS)**
36
+ - Trigger: Acquired AI-focused cyber firm Kudu Dynamics for $300M cash ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)).
37
+ - Stats: P/E ~16, ROE ~16%, Market Cap ~$17.6B (as of May 2025): stable, defense/cyber/AI mix, decent value for its sector.
38
+ - Watch for: Expanded AI defense/cyber offering, M&A synergy upside.
39
+
40
+ - **Invoca** (private, potential IPO/M&A target)
41
+ - Trigger: Acquired Symbl.ai (AI-powered customer experience, $23M funding) — raises profile as a revenue automation leader.
42
+
43
+ ### **Notable Startups & VC-Backed Companies**
44
+ - **Rillet**
45
+ - Trigger: Raised $25M Series A (Sequoia, <1 yr post-seed). Focus: AI for finance/accounting automation ([TechCrunch](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/)).
46
+ - Value Note: Early institutional traction + rapid fundraising, in nascent AI-for-services vertical.
47
+
48
+ - **Valla**
49
+ - Trigger: $2.7M seed to democratize legal recourse using GenAI; focus on employee rights ([TechCrunch](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)).
50
+ - Value Note: High regulatory moat, early traction, strong founder narrative.
51
+
52
+ - **Inven**
53
+ - Trigger: $12.75M for AI-native deal sourcing (potential to disrupt PitchBook and legacy PE data vendors) ([Tech Funding News](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/)).
54
+ - Value Note: Unique vertical for AI, early validation.
55
+
56
+ - **Symbl.ai** (acquired by Invoca)
57
+ - Trigger: AI-powered conversation intelligence; validates VC-funded exit path for vertical AI.
58
+
59
+ - **Agentic AI, Data Privacy, and Fraud Detection Startups**
60
+ - Trigger: Fintech demand for agentic AI, "AI factories", and fraud detection = greenfield for private AI infra startups ([FinTech Futures](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)).
61
+
62
+ ---
63
+
64
+ ## **3. What Smart Money Might Be Acting On**
65
+
66
+ - **AI Rollup Trend:** Major VCs (Khosla, Elad Gil) are moving beyond backing pure-play startups to quietly acquiring and aggregating legacy companies, layering AI products on top ([TechCrunch](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)).
67
+ - **Why:** Lower risk than bleeding-edge AI bets, immediate cash flow, and quick access to hard-to-get enterprise customers.
68
+
69
+ - **Enterprise AI B2B**: Bet on startups with regulatory/vertical moats (finance, healthcare, legal) rather than direct consumer GenAI, where hype/competition is fierce.
70
+
71
+ - **AI-Driven M&A:** Incumbents in security, defense, and SaaS (like Leidos, Invoca) are primed to bolt on AI capabilities quickly — making small-cap/public firms with unique IP potential targets.
72
+
73
+ - **Compute Infrastructure:** Buy or build into companies with data center, AI-chip exposure, or proprietary algorithms serving banks, fintechs, or life sciences.
74
+
75
+ - **Compliance/Privacy**: Funds may flow to specialist consultancies and SaaS with privacy/compliance focus, as regulatory overhang tightens.
76
+
77
+ ---
78
+
79
+ ## **4. References**
80
+
81
+ - [AI in Psychiatry](https://www.nature.com/articles/s41380-025-03072-3) | [AI for Biology](https://www.rudebaguette.com/en/2025/06/ai-finally-did-it-breakthrough-in-biology-solves-a-mystery-scientists-have-been-chasing-for-over-30-years/)
82
+ - [Fintech/Agentic AI Demand](https://www.fintechfutures.com/ai-in-fintech/unlock-fintech-innovation-with-agentic-ai-ai-factories-and-ai-powered-fraud-detection-workflows)
83
+ - [Leidos, Invoca M&A](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
84
+ - [Khosla/Elad Gil Rollups](https://techcrunch.com/2025/05/23/khosla-ventures-among-vcs-experimenting-with-ai-infused-roll-ups-of-mature-companies/), [TechCrunch on Rollups](https://techcrunch.com/2025/06/01/early-ai-investor-elad-gil-finds-his-next-big-bet-ai-powered-rollups/)
85
+ - [Rillet/Startup Rounds](https://techcrunch.com/2025/05/30/startups-weekly-amd-acquisition-and-other-moves-to-scale-ai-startups/), [Inven Raise](https://techfundingnews.com/next-gen-ai-pitchbook-rival-finnish-inven-grabs-12-75-for-its-first-ai-native-deal-sourcing-platform/), [Valla Seed](https://techcrunch.com/2025/06/02/valla-raises-2-7m-to-make-legal-recourse-more-accessible-to-employees/)
86
+ - [Data Privacy/Regulation](https://www.darkreading.com/cyber-risk/rethinking-data-privacy-age-generative-ai)
87
+ - [AI Macro Debate](https://www.forbes.com/sites/bernardmarr/2025/05/23/ai-could-reshape-humanity-and-we-have-no-plan-for-it/)
88
+
89
+ ---
90
+
91
+ ## **5. Investment Hypothesis**
92
+
93
+ - **The Cream Rises:** Amidst AI hype, value is accruing fastest to (a) established firms acquiring AI/niche tech, (b) small/midcap vertical SaaS/AI companies with regulatory moats, and (c) strategic AI-powered rollups with serious institutional expertise and cash flow.
94
+ - **Key Thesis:** Ignore the frothy megacap multiples; focus on under-followed AI stocks and private companies with:
95
+ - Proven B2B or SaaS revenue,
96
+ - Unique IP/defensible verticals,
97
+ - cashflow or recent M&A/VC validation,
98
+ - are potential rollup or acquisition targets.
99
+ - **Tailwind:** Surging demand in regulated and semi-regulated verticals (health, finance, defense, legal).
100
+ - **Headwind:** Regulatory/ethical scrutiny could increase cost of doing business for generalist GenAI players — favoring those with purpose-built compliance tools or vertical knowledge.
101
+
102
+ **Bottom Line:**
103
+ - Watch for M&A in defense/cybersecurity, SaaS, and AI-powered B2B plays (Leidos, Invoca, Rillet).
104
+ - Track VC-backed AI rollups as stealth vehicles for value creation and future IPO/M&A pops.
105
+ - Seek out early-stage startups in vertical SaaS or legaltech deploying AI in compliance-intensive settings.
106
+ - Physical compute/infra players serving agentic AI (AMD, data centers) continue to benefit from secular demand.
107
+
108
+ ---
109
+
110
+ **Broad Summary of This Week's AI News:**
111
+ The week was dominated by continued VC confidence, strategic M&A, and institutional moves in vertical AI applications, with significant attention on small-cap and startup valuations. Macroeconomic backdrop remains strong for AI demand, but value opportunities lie beneath the surface in rollups, newly funded vertical SaaS, and compliance-driven niches. Regulatory risk rising but also carving new investable moats.
112
+
113
+ ---
114
+
115
+ ---
116
+
117
+
118
+ > Topic: `Nuclear energy`
119
+ > Articles Collected: `150`
120
+ > Generated: `2025-07-04 13:55`
121
+ >
122
+ # Nuclear Energy: Value-Investor Weekly Memo
123
+ **Week of June 30 – July 7, 2025**
124
+
125
+ ---
126
+
127
+ ## Executive Summary: Sentiment & Market Trends
128
+
129
+ This week, nuclear energy remains at the center of global and U.S. energy policy debates, buoyed by both political tailwinds (GOP-led support in legislation, state-level deployment pushes) and rising demand from AI/data center infrastructure. Nuclear is also strategically reemerging as the “clean firm” power of choice as renewables face policy setbacks, intermittency challenges, and grid reliability strains. Major tech companies and select startup activity point to accelerations in both fission (SMRs) and fusion, with corporate and government actors signaling capital and operational shifts toward advanced nuclear solutions.
130
+
131
+ Market sentiment appears mildly positive for established names but remains neutral for the broader sector. Early-stage deal flow and new executive moves hint at undervalued opportunities in uranium miners, SMR developers, and next-gen reactor supply chains, all backstopped by robust macro trends.
132
+
133
+ ---
134
+
135
+ ## 1. Key Value Signals
136
+
137
+ - **Public-Private Partnerships & Policy Tailwinds**
138
+ - New York’s governor directs pursuit of at least 1 GW of new nuclear (possible “fleet-style” deployments), signifying state-level commitment.
139
+ - GOP legislation weakens renewables but retains and even enhances support for nuclear/geothermal—improving medium-term earning prospects for nuclear-exposed businesses.
140
+ - **Tech Giant Commitments**
141
+ - Google commits to buying power from Commonwealth Fusion Systems (fusion) and from Kairos Power (SMRs/fission), underscoring long-term belief in and potential floor demand for advanced nuclear power.
142
+ - **M&A / Executive Movement**
143
+ - Ur-Energy (URG) names Matthew Gili (ex-Cameco, Energy Fuels) as President; strong management pedigree in uranium mining suggests focus on operational ramp-up and credibility for growth.
144
+ - **Private Funding & Industrial Partnerships**
145
+ - Westinghouse-ITER $180M fusion contract advances commercial pathways for fusion.
146
+ - Palantir partners with The Nuclear Company for AI deployment in nuclear construction, potentially de-risking timelines and cost overruns—key bottlenecks for new plants.
147
+ - **Uranium Financing**
148
+ - Energy Fuels (NYSE: UUUU) launches $300M ATM share offering for growth and possibly M&A, indicating possible scale-up action or acquisition-driven value.
149
+
150
+ ---
151
+
152
+ ## 2. Stocks or Startups to Watch
153
+
154
+ ### Undervalued Small Caps / Startups
155
+
156
+ - **Ur-Energy (URG)**
157
+ - **Sector**: Uranium production/mining
158
+ - **Signals**: New CEO with pedigree, North American supply play; potential for insider or institutional accumulation.
159
+ - **Fundamentals**: Historically low P/B and P/E vs. sector; improving cash flow as uranium prices trend higher.
160
+ - **Energy Fuels (UUUU)**
161
+ - **Sector**: Uranium/rare earths
162
+ - **Signals**: ATM share offering—could precede an operational expansion, M&A, or balance sheet fortification.
163
+ - **Moat**: Vertical integration and North American production base; tailwinds from potential U.S. uranium supply mandates.
164
+ - **Kairos Power**
165
+ - **Sector**: Small Modular Reactor (SMR) developer
166
+ - **Signals**: Google is a committed off-taker (500 MW); not public but watch for IPO or private rounds.
167
+ - **Moat**: Proprietary reactor and fuel tech, first-mover commercial projects.
168
+ - **Commonwealth Fusion Systems (private)**
169
+ - **Sector**: Fusion
170
+ - **Signals**: Google investing + off-take for 200MW; implies robust institutional backing, possible pre-IPO unicorn.
171
+ - **Moat**: Leading IP/patent portfolio in commercial fusion.
172
+ - **Floating Nuclear Consortia (Europe/Mediterranean)**
173
+ - **Sector**: Maritime nuclear
174
+ - **Signals**: New industry consortium for floating plants; regulatory tailwinds in Europe; riskier but paradigm-shifting.
175
+
176
+ ### Large-Cap Defensive/Incumbent Names
177
+
178
+ - **Westinghouse (private, but watch via Brookfield Asset Management/partners)**
179
+ - **Signals**: $180M fusion contract + global SMR tenders.
180
+ - **Moat**: Deep IP/patents, established utility relationships.
181
+
182
+ #### Emerging Themes
183
+ - SMEs/startups deploying AI to compress reactor construction timelines (e.g., The Nuclear Company + Palantir).
184
+ - Uranium spot market dislocations, supply security, and U.S./Canadian production uptrend.
185
+
186
+ ---
187
+
188
+ ## 3. What Smart Money Might Be Acting On
189
+
190
+ ### Institutional Moves and VC Flows
191
+
192
+ - **Tech Company Off-Take Agreements**: Google’s long-dated power purchase agreements (PPAs) for nuclear fusion and SMRs indicate that large buyers are locking in future clean firm power, giving runway and de-risking revenue for emerging projects.
193
+ - **Leadership Talent Migration**: Appointment of high-profile operators (e.g., Matthew Gili at URG) often precedes capital flows and operational improvement.
194
+ - **Private/VC Investment**: Ongoing private fundraising in fusion (CFS/publicized; others less visible) and SMR space—potential for pre-IPO access or PIPE deals.
195
+ - **Policy-driven Lifts**: Funds with a value/cyclical tilt may be accumulating uranium miners and established SMR suppliers, expecting U.S. or European state-driven demand and pricing power.
196
+
197
+ ---
198
+
199
+ ## 4. References
200
+
201
+ - [Insider Monkey: Ur-Energy appoints Matthew Gili](https://www.insidermonkey.com/blog/ur-energy-urg-names-matthew-gili-as-president-to-support-growth-strategy-1562642/)
202
+ - [TechCrunch: Google’s data center energy use doubles; commits to SMRs & Fusion](https://techcrunch.com/2025/07/01/googles-data-center-energy-use-doubled-in-four-years/)
203
+ - [Newsweek: Google bets on Nuclear Fusion, Commonwealth Fusion Systems](https://www.newsweek.com/google-bets-nuclear-fusion-next-generation-clean-power-2091877)
204
+ - [POWER Magazine: Westinghouse & ITER fusion contract](https://www.powermag.com/westinghouse-iter-sign-180-million-contract-to-advance-nuclear-fusion/)
205
+ - [Utility Dive: NY Gov. Hochul nuclear push](https://www.utilitydive.com/news/new-york-gov-hochul-hints-at-fleet-style-approach-to-nuclear-deployments/751838/)
206
+ - [Insider Monkey: Energy Fuels ATM offering](https://www.insidermonkey.com/blog/energy-fuels-uuuu-launches-300-million-atm-share-offering-program-1562647/)
207
+ - [Marine Link: Industry consortium assesses floating nuclear](https://www.marinelink.com/news/industry-consortium-asses-floating-527616)
208
+ - [The Verge, Sky News, NPR, CleanTechnica] (multiple for macro/policy context)
209
+
210
+ ---
211
+
212
+ ## 5. Investment Hypothesis
213
+
214
+ Amid rising electricity demand from AI/data centers and the political marginalization of wind/solar, nuclear energy—particularly next-gen reactor developers, operationally leveraged uranium miners, and AI-enabled project managers—is set to benefit from both structural and cyclical forces. Near-term policy support, tech company PPA commitments, and tangible operational milestones (fusion contracts, executive talent upgrades) provide a fundamental backdrop for value investors.
215
+
216
+ **Thesis**: Select undervalued uranium miners (URG, UUUU) and actionable SMR/fusion-related plays with real partnerships or contracts (Kairos, CFS, Palantir’s nuclear construction software partners) are likely mispriced relative to long-term demand, the emergence of tech buyer power, and regulatory tailwinds. Watch for balance sheet improvement, insider activity, and capex deployment as future catalysts.
217
+
218
+ **Actionable Watchlist:**
219
+ - Ur-Energy (NYSE: URG) — ride management upgrade and uranium bull cycle
220
+ - Energy Fuels (NYSE: UUUU) — play on U.S. supply autonomy and balance sheet firepower
221
+ - Private: Kairos Power, Commonwealth Fusion Systems — monitor for IPO/news, pre-IPO funds
222
+ - Established supply chain: Westinghouse (via BAM, or tracking SMR contracts), Palantir’s nuclear ventures
223
+
224
+ ---
225
+
226
+ **Macroeconomic/Regulatory Context:**
227
+ - U.S. and European grid reliability and policy now lean “pro-nuclear” as renewables face political and technical hurdles.
228
+ - Tech-sector demand for bespoke clean, reliable baseload may outpace traditional grid growth, driving long-term PPA/contracting up for nuclear-adjacent firms.
229
+ - Early stage risk remains (especially fusion), but government cash, looser environmental reviews, and talent influx are de-risking the sector.
230
+
231
+ ---
232
+
233
+ **Discipline:** Accumulate on dips with a margin of safety; remain alert to policy reversals, cost overruns, and technology risk. Revisit on IPO news, federal incentive shifts, and real-world contract wins.
234
+
235
+ ---
236
+
237
+
238
+ > **Metrics**
239
+ > Topic: `nuclear energy`
240
+ > Articles Collected: `60`
241
+ > Generated: `2025-06-03 11:52`
242
+ >
243
+ # Nuclear Energy: Value Investing Focus – Week Ending 2/June/2025
244
+
245
+ ---
246
+ ## Intro: Market Context and Week Summary
247
+
248
+ Nuclear energy took center stage this week, driven by major executive moves in U.S. energy policy, heightened demand from AI/data centers, and investor/VC excitement about SMRs (small modular reactors). With Trump’s administration rolling out pro-nuclear executive orders and Europe/Asia accelerating new builds, public and private capital is steadily shifting back into nuclear plays. The macro environment is bullish: regulatory timelines are shortening, capital support is rising, and energy stability/cleanliness place nuclear above wind and solar in AI-focused grid conversations. On the ground: several companies (including Oklo, BWX Technologies, and Centrus) received analyst upgrades, utilities are racing to deploy SMRs, and nuclear-tech startups are pulling in fresh VC funds. Smart money is watching supply chains (uranium), next-gen reactors, and infrastructure/enabling tech for nuclear’s new "golden age."
249
+
250
+ ---
251
+
252
+ ## 1. Key Value Signals
253
+
254
+ - **Major U.S. Policy Shift**: New Trump administration executive orders to accelerate nuclear tech approval, reduce permitting times and support uranium supply chains ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/), [Forbes](https://www.forbes.com/sites/llewellynking/2025/05/31/nuclear-golden-age-huge-potential-stubborn-obstacles/)).
255
+ - **Big Tech Partnership Moves**: Google (and earlier, Meta) inking first agreements with small modular reactor developers ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)).
256
+ - **Startups & VC Funding Rounds**: Atomic Canyon (AI for nuclear), Kairos Power, and others drawing new funding ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium), [TechCrunch](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)).
257
+ - **Utility Action on SMRs**: TVA becomes first U.S. utility to seek permit for SMR, indicating a path for future orders ([Insurance Journal](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)).
258
+ - **Analyst Upgrades and Insider Buys**: Oklo (OKLO), Centrus Energy (LEU), and BWX Technologies (BWXT) upgraded ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
259
+ - **Strong Fundamental Tailwinds**:
260
+ - **Low P/E, Strong ROE/FCF**: Several nuclear/uranium plays trading below market P/E, generating high free cash flow, with secular macro demand increases.
261
+ - **Moats Emerging**: Through regulatory complexity, IP, and public-private partnerships.
262
+
263
+ ---
264
+
265
+ ## 2. Stocks or Startups to Watch
266
+
267
+ ### **Listed Stocks**
268
+
269
+ #### **Oklo (OKLO)**
270
+ - **Trigger:** Analyst upgrades post-Trump nuclear EO, SMR play, strong U.S. government support ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/))
271
+ - **Fundamentals:** Newly public (<6 months), early FMC/S-1 data. Moat: First SMR in pipeline, government/tech sector contracts.
272
+ - **Metric:** Expected SMR deployment, contract pipeline not yet priced in.
273
+
274
+ #### **Centrus Energy (LEU)**
275
+ - **Trigger:** Upgraded, uranium supply chain play; critical to new U.S. nuclear push ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/))
276
+ - **P/E:** ~13 ([Yahoo Finance](https://finance.yahoo.com/quote/LEU/))
277
+ - **ROE:** ~27%
278
+ - **Market Cap:** ~$650M
279
+ - **Comment:** Only U.S. uranium enrichment capability, crucial as U.S. looks to de-risk from Russia ([Mining.com.au](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)).
280
+
281
+ #### **BWX Technologies (BWXT)**
282
+ - **Trigger:** Major reactor supplier for U.S. Navy and DoE, among first to benefit from process acceleration ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
283
+ - **P/E:** ~24
284
+ - **ROE:** ~35%
285
+ - **Moat:** Navy sole-source positioning, R&D, U.S. government contracts.
286
+ - **Market Cap:** ~$10B
287
+
288
+ #### **NuScale Power (SMR)**
289
+ - **Trigger:** NRC has approved SMR design, clearing path for deployment ([Utility Dive](https://www.utilitydive.com/news/nrc-approves-nuscale-small-modular-reactor-smr/749538/))
290
+ - **Metric:** High short interest post-IPO, but new regulatory tailwinds. Watch for major contract wins.
291
+
292
+ #### **Paladin Energy (PDN.AX)**
293
+ - **Trigger:** Making moves at Patterson Lake as uranium demand surges with U.S. and global SMR build ([Mining.com.au](https://mining.com.au/paladin-proceeds-at-patterson-lake/)).
294
+ - **Comment:** Undervalued relative to long-term uranium price upcycle.
295
+
296
+ ### **Startups & Undercapitalized Opportunities**
297
+
298
+ - **Atomic Canyon**: AI-powered B2B software for nuclear industry. Raised $7M seed led by Energy Impact Partners (backers of several energy unicorns). Aim: “ChatGPT for nuclear” ([TechCrunch](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/))
299
+
300
+ - **Kairos Power**: Leading small modular reactor startup—Google is the first customer for future SMR energy. (direct purchase PPA) ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power))
301
+
302
+ - **Type One Energy**: Fusion startup, just completed formal initial design review ([Power Magazine](https://www.powermag.com/avangrid-investing-41-million-to-rebuild-ny-grid-infrastructure/)).
303
+
304
+ ---
305
+
306
+ ## 3. What Smart Money Might Be Acting On
307
+
308
+ - **Venture/Institutional**: Top-tier VCs (Energy Impact Partners, Plug and Play, Tower Research) making preemptive moves into enabling tech/software (e.g., Atomic Canyon).
309
+ - **Corporate Power Users (Big Tech)**: Google, Meta inking deals with SMR startups—future demand signal for new nuclear ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)).
310
+ - **Analyst Coverage/Upgrades**: William Blair’s initiation on OKLO, LEU, and BWXT signals Wall Street is waking up to regulatory + macro catalysts ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
311
+ - **Utilities/State Action**: TVA and Texas moving to lead SMR deployment and streamline permitting—possible template for state-federal partnerships ([Insurance Journal](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm), [GovTech](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)).
312
+ - **Insider-Led Companies**: Centrus Energy (LEU, ex-government insiders, U.S.-centric contracts), Oklo (deep government, tech ecosystem relationships).
313
+
314
+ ---
315
+
316
+ ## 4. References/Sources
317
+
318
+ - [Forbes - U.S. must double down on nuclear](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)
319
+ - [Forbes - Data Center Energy Wars](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)
320
+ - [The Guardian - Tech firms buy SMR power](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)
321
+ - [Investor's Business Daily - Nuclear stocks upgraded](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)
322
+ - [Axios - Atomic Canyon B2B seed](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
323
+ - [TechCrunch - Atomic Canyon profile](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)
324
+ - [Insurance Journal - TVA SMR permit](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)
325
+ - [Utility Dive – NRC approves NuScale SMR design](https://www.utilitydive.com/news/nrc-approves-nuscale-small-modular-reactor-smr/749538/)
326
+ - [Mining.com.au – Centrus/Paladin/uranium momentum](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)
327
+ - [Yahoo Finance – LEU Key Stats](https://finance.yahoo.com/quote/LEU/)
328
+
329
+ ---
330
+
331
+ ## 5. Investment Hypothesis
332
+
333
+ **Thesis:**
334
+ Recent regulatory and policy catalysts have created a structural tailwind for both incumbent and next-gen nuclear energy firms, particularly those exposed to SMRs, uranium refining, and critical enabling tech/software. The current market underappreciates the scale and allocation speed of coming capital inflows (from utilities, governments, and data cloud majors). Valuations (esp. in uranium and contractors) remain attractive on a P/E and FCF basis compared to wind/solar.
335
+
336
+ - **Buy candidates:** Oklo (OKLO), Centrus (LEU), BWX Technologies (BWXT), Paladin (PDN.AX), NuScale (SMR)
337
+ - **Venture/early-exposure:** Consider gaining VC fund/PE exposure to emerging nuclear tech/software infrastructure (e.g., Atomic Canyon, Kairos Power).
338
+ - **Rationale:** U.S./global policy, increased AI power grid demand, and high barriers to entry combine for exceptional medium/long-term risk/reward—especially after this week’s “regime change” in sentiment and regulation.
339
+
340
+ **Monitor:**
341
+ New contract wins for SMR developers. U.S. uranium production and enrichment capacity (LEU). Expansion or new partnerships with tech/utility majors. Insider ownership trends and further analyst coverage for nuclear sector plays.
342
+
343
+ ---
344
+
345
+ ### Overall: This week’s news offers a clear “green light” for value investors in nuclear, particularly those seeking both deep value (LEU, BWXT) and long-tail growth via platform/SMR innovators (OKLO, Kairos, NuScale). U.S. government and major tech-firm endorsement serves as powerful affirmation for the sector’s re-rating.
346
+
347
+ ---
348
+
349
+ ---
350
+
351
+
352
+ > Topic: `AI`
353
+ > Articles Collected: `163`
354
+ > Generated: `2025-07-04 13:40`
355
+ >
356
+ # Value Investor AI Weekly Memo
357
+ **Week of June 30 – July 6, 2025**
358
+
359
+ ---
360
+
361
+ ## Market Sentiment & Trends
362
+
363
+ This week, the AI market continues to be characterized by robust *growth optimism* and *massive capital deployment*. Sentiment remains largely positive in infrastructure and applied AI, but there is rising skepticism toward sky-high private market valuations in some fast-following startups. Major headlines focus on AI’s influence in cybersecurity, legal, HR, and consulting verticals, as well as the continuing "picks and shovels" theme in datacenter hardware and services.
364
+ *No major regulatory shocks noted*, but institutions and investors are expressing caution about the sustainability of AI startup valuations and possible hype cycles.
365
+
366
+ ---
367
+
368
+ ## 1. Key Value Signals
369
+
370
+ - **Infrastructure Focus Remains Dominant:** The highest conviction for value investing is in AI infrastructure—hardware, datacenters, and core networking.
371
+ - **M&A and Partnership Activity:** Notable signals like Apple considering partnerships/acquisitions for Siri enhancements (Anthropic, OpenAI) and SoftBank moving aggressively on artificial superintelligence with multi-phase global projects.
372
+ - **Startup Capital Flows Accelerating:** Noteworthy rounds at Harvey ($300M Series E, legal AI), Abridge ($300M Series E, medical AI), Metaview ($35M, hiring), and Lovable ($150M rumored). However, most are at steep valuations (>$2B pre/post-money).
373
+ - **Insider & Smart Money Activity:** a16z, Kleiner Perkins, Coatue, Google Ventures, and Sequoia are active, with Glasswing Ventures discussing new AI funding strategies.
374
+ - **Geographic Expansion:** SoftBank’s ASI moves and Asia-centric “build with context” approach highlight a more sustainable, potentially undervalued new-entrant pipeline.
375
+
376
+ ---
377
+
378
+ ## 2. Stocks or Startups to Watch
379
+
380
+ ### **Public Markets:**
381
+ - **Arista Networks (ANET)**, **Nvidia (NVDA)**, **AMD (AMD):** “Picks and shovels” for the AI gold rush—datacenter, networking, compute chips. *Arista* has a lower valuation multiple than Nvidia, still strong ROE, and is less crowded.
382
+ - **SoftBank (SFTBY/SFTBF):** The push for "artificial superintelligence" signals heavy capital spend, but could be an undervalued play if execution improves and Vision Fund losses subside.
383
+ - **Apple (AAPL):** Movement on AI partnerships/acquisitions may re-rate Siri’s potential, although Apple trades rich by value standards.
384
+
385
+ ### **Private/Startup Watchlist:**
386
+ - **Harvey (Legal AI):** $5B valuation, but massive adoption potential for legal transformation; recently had consecutive mega-rounds—possibly ahead of fundamentals.
387
+ - **Abridge (Healthcare AI):** $5.3B valuation; automating medical notes is a real use-case, but valuation steep.
388
+ - **Metaview (Recruitment AI):** Google Ventures led; automating/bias-reducing hiring—smaller, earlier, potentially higher reward.
389
+ - **Lovable:** On track for $150M at $2B. Early-stage AI firm, unknown fundamentals, but worth tracking as a potential future public market debut.
390
+
391
+ ### **Infrastructure enablers:**
392
+ - **Scott Data (Private):** Midwest US data center, supporting AI startups—potential for M&A or IPO as picks-and-shovels to the AI startup wave.
393
+ - **Industrial/Manufacturing AI:** Watch industrial AI “digital twins” and multimodal analytics for less-flashy, but real, B2B moats.
394
+
395
+ ---
396
+
397
+ ## 3. What Smart Money Might Be Acting On
398
+
399
+ - **Private Market Rotation:** Top VCs (Kleiner Perkins, a16z, Coatue, Sequoia, Google Ventures) are doubling down on AI startups, but selectively—pivoting more to infrastructure, HR, and healthcare use-cases where actual adoption is measurable.
400
+ - **Datacenter & Networking Expansion:** Institutional and growth investors pushing into datacenter, network, and hardware plays over frothy model-chatbot proliferators.
401
+ - **“Asia Build” Angle:** Long-term capital weighs Asian AI execution models, where blitzscaling is shunned for capital efficiency. Early institutional allocation might offer less-overpriced entry into the next breakout AI winners.
402
+
403
+ ---
404
+
405
+ ## 4. References
406
+
407
+ - [Forbes: AI Hype Cycle & Infrastructure](https://www.forbes.com/sites/rscottraynovich/2025/07/01/inside-the-ai-hype-cycle-whats-next-for-enterprise-ai/)
408
+ - [RCR Wireless: SoftBank's Superintelligence Ambitions](https://www.rcrwireless.com/20250630/ai-infrastructure/softbank-artificial)
409
+ - [TechCrunch: Harvey, Abridge funding](https://techcrunch.com/2025/06/27/startups-weekly-tech-and-the-law/)
410
+ - [Startup Ecosystem Canada: Lovable AI funding](https://www.startupecosystem.ca/news/lovable-ai-startup-on-track-to-raise-150m-at-2b-valuation/)
411
+ - [GovTech: Scott Data, Omaha AI infrastructure partnership](https://www.govtech.com/artificial-intelligence/partnership-looks-to-drive-ai-adoption-in-omaha-neb)
412
+ - [Mining Technology: Industrial/Multimodal AI](https://www.mining-technology.com/sponsored/whats-next-for-industrial-ai-five-key-developments-shaping-the-space/)
413
+ - [Business Insider: Claude/Anthropic, Microsoft AI as a core workflow](https://www.businessinsider.com/claude-ran-store-anthropic-ai-agent-lessons-learned-middle-managers-2025-6)
414
+
415
+ ---
416
+
417
+ ## 5. Investment Hypothesis
418
+
419
+ **The market is in the mid-to-late innings of the first generative AI value cycle. Near-term value is likely to accrue to AI infrastructure enablers (datacenter, networking, compute), NOT to richly-priced flashy model startups. The next wave UNLOCK is in B2B-specific verticals—manufacturing, healthcare, legal, hiring—especially those with defensible data or workflows (moats). Early-stage infrastructure providers outside the Bay Area (e.g., Midwest data centers, lower-multiple Asia AI shops) may offer underappreciated value. SoftBank’s renewed push and Apple’s partnership strategy suggest major future M&A, benefiting core AI tech and infrastructure players.**
420
+
421
+ ### **Screen for:**
422
+ - Public tech with strong fundamentals (low P/E, high ROE, cash flows) in critical infrastructure (Arista, AMD)
423
+ - Private companies with repeat-use, high-barrier products — notably in B2B SaaS, industrial, or privacy-compliant hiring/medtech AI
424
+ - Undercovered, smaller infrastructure shops and regional datacenter players (public or potential IPO/M&A targets)
425
+
426
+ ---
427
+
428
+ **(Caveat: Recent startup valuations may be unsustainably high. Exercise discipline; seek evidence of unit economics and actual cashflow, not just growth metrics.)**
429
+
430
+ ---
431
+
432
+
433
+ > Topic: `nuclear energy`
434
+ > Articles Collected: `133`
435
+ > Generated: `2025-07-02 20:18`
436
+ >
437
+ # Nuclear Energy Weekly Value Investing Memo
438
+ **Week of July 1, 2025**
439
+
440
+ ---
441
+
442
+ ### **Market Sentiment & Trends**
443
+ This week’s news reconfirms nuclear energy’s rising status as both a grid reliability solution and a strategic utility for tech and industrial growth. Demand drivers include:
444
+ - Growing AI/data center needs (Google, Microsoft, Amazon heavily engaged)
445
+ - Policy tailwinds and new US DOE initiatives
446
+ - New partnerships and investments from leading tech and engineering firms
447
+ - Heightened urgency, both industrially and politically, for next-gen nuclear and advanced enrichment.
448
+
449
+ The overall sentiment is incrementally positive: there’s powerful momentum for nuclear expansion (especially advanced/small modular/fusion), but major regulatory, funding, and execution risks remain.
450
+
451
+ ---
452
+
453
+ ## 1. **Key Value Signals**
454
+
455
+ - **Big Tech Putting Capital to Work**: Google commits to buying electricity from both *fusion* (Commonwealth Fusion Systems) and *fission* (Kairos Power—an SMR startup), signaling a long-term offtake demand for clean nuclear output. These deals, while years out, anchor real business models and future cash flows in an industry where certainty has been rare.
456
+
457
+ - **DOE Fast-Tracks Advanced Nuclear**: The US Department of Energy (DOE) launched a pilot program to authorize *private* test reactors—removing a longstanding barrier for early-stage and test deployments. This regulatory facilitation could accelerate revenue opportunities for startups.
458
+
459
+ - **AI Meets Nuclear Construction**: Palantir—a leader in data analytics—announced its software will drive efficiency in reactor construction (with “The Nuclear Company”), signaling an ecosystem of digital infrastructure forming around new builds.
460
+
461
+ - **Strategic Collaborations**: Oklo (recent SPAC, high-profile leadership) and Bill Gates’ TerraPower signed a partnership around domestic HALEU enrichment—critical for next-generation reactors and a US supply chain play.
462
+
463
+ - **Major Fusion Funding**: Westinghouse and ITER sign a $180M contract to push fusion technology, while global fusion market size forecasts surge.
464
+
465
+ - **IPO and Recent SPAC Activity**: Oklo’s public listing, ongoing chatter around SMR startups seeking either funding or public exits.
466
+
467
+ ---
468
+
469
+ ## 2. **Stocks or Startups to Watch**
470
+
471
+ **A. Public/Recent IPO & Small Cap Opportunities**
472
+ - **Oklo (NYSE: OKLO)**
473
+ - **Profile**: Recent SPAC debut; backed by substantial leadership and Bill Gates’ circle via TerraPower collaboration.
474
+ - **Signals**: Strategic partnerships, domestic enrichment angle, close alignment with DOE pilot regulatory streamlining.
475
+ - **Check**: Valuation (historically rich for early-stage nuclear), business execution, and regulatory milestones.
476
+
477
+ - **Kairos Power (private, but IPO/speculation possible)**
478
+ - **Profile**: Small modular reactor company. Google offtake deal is a significant vote of confidence.
479
+ - **Signals**: Market validation, long-term revenue anchor (if plant comes online).
480
+
481
+ - **Commonwealth Fusion Systems (private)**
482
+ - **Profile**: Leading fusion startup; Google as an offtaker/investor.
483
+ - **Signals**: Earliest in its lifecycle, but with elite backing. Watch for pre-IPO funding rounds and cap table changes.
484
+
485
+ **B. Established, Undervalued Nuclear Plays (Check Valuation/Fundamentals)**
486
+ - **BWX Technologies (NYSE: BWXT)**
487
+ - **Profile**: Established supplier for nuclear reactors and specialized components.
488
+ - **Moat**: Deep US government/defense contracts, emerging advanced reactor supply role.
489
+ - **Valuation**: P/E ratio tends to be market-comparable, but free cash flow strong and recurring revenue profile.
490
+ - **Signal**: Exposure to multiple advanced reactor programs, SMR rollout, and robust political support.
491
+
492
+ - **Centrus Energy (NYSEMKT: LEU)**
493
+ - **Profile**: Only US public company with commercial uranium enrichment capability—potential HALEU winner.
494
+ - **Signals**: Vital for fueling advanced reactors; highly levered to new DOE policies.
495
+ - **Risks**: Small cap, volatile, but high convexity if advanced nuclear takes off in '26+.
496
+
497
+ **C. Infrastructure, EPC, and Software**
498
+ - **Palantir Technologies (NYSE: PLTR)**
499
+ - **Profile**: Now branching into nuclear with specialized construction/efficiency software.
500
+ - **Signal**: Long-term, stickier defense/critical infrastructure business.
501
+
502
+ ---
503
+
504
+ ## 3. **What Smart Money Might Be Acting On**
505
+
506
+ - **Pre-emptive Strategic Investment**: Major techs (Google especially) are locking in low-carbon electricity contracts before physical infrastructure is built. Early investor entry into fusion/SMR supply chains could offer “picks & shovels” asymmetry.
507
+
508
+ - **Pivot to Domestic Supply Chain**: Oklo/TerraPower collaboration for HALEU enrichment directly addresses “made in America” energy/defense policy. This is the tip of a deglobalization and re-onshoring trend—any US enrichment or SMR component supplier could be in play.
509
+
510
+ - **Software/Services Layer**: The nuclear restart will bring new opportunities for “enabling” firms: EPC (AECOM, AtkinsRéalis, Arup), new digital/digital twins/AI (Palantir), and regulatory facilitators.
511
+
512
+ - **Advanced Reactor “First Movers”**: Policy support (DOE program) will favor companies close to deployment/breakthrough—those that can move from pilot to cash generation by 2026-2030. Early capital and regulatory champions could see premium returns.
513
+
514
+ ---
515
+
516
+ ## 4. **References**
517
+
518
+ - [Google’s Data Center Bets — TechCrunch](https://techcrunch.com/2025/07/01/googles-data-center-energy-use-doubled-in-four-years/)
519
+ - [US DOE Pilot Program — POWER Magazine](https://www.powermag.com/doe-pilot-program-targets-three-nuclear-test-reactors-for-2026-criticality-under-department-authorization/)
520
+ - [Palantir and Nuclear — POWER Magazine](https://www.powermag.com/groups-partnering-to-develop-ai-software-to-speed-nuclear-reactor-construction/)
521
+ - [Oklo/TerraPower/HALEU — Oil & Gas 360](https://www.oilandgas360.com/oklo-enters-strategic-collaborations-with-hexium-and-terrapower-to-launch-new-pathway-for-domestic-haleu-enrichment/)
522
+ - [Westinghouse/ITER Contract — POWER Magazine](https://www.powermag.com/westinghouse-iter-sign-180-million-contract-to-advance-nuclear-fusion/)
523
+ - [Fusion Market Outlook — Precedence Research](https://www.precedenceresearch.com/fusion-energy-market)
524
+ - [BWX Technologies (BWXT) — Investor Relations](https://www.bwxt.com/)
525
+
526
+ ---
527
+
528
+ ## 5. **Investment Hypothesis**
529
+
530
+ **Thesis**: The convergence of policy, technology (AI/data center demand), and strategic investment from leading corporates is catalyzing a new nuclear buildout cycle—especially in the US. *First-mover* advanced fission and fusion startups, US-centric enrichment supply, and key enabling technologies (digital/twin/AI/construction) stand to generate outsize returns, particularly ahead of confirmed revenue streams in the early 2030s.
531
+
532
+ - **Core Bets**:
533
+ - **Oklo** — if price corrects—offers a uniquely exposed pure play on the regulatory shift and DOE pilot program.
534
+ - **Centrus Energy** — levered, high-risk/high-reward play on domestic HALEU enrichment.
535
+ - **BWX Technologies** — lower-risk, steady exposure to SMR and advanced builds, and possible defense tailwinds.
536
+
537
+ - **Venture/Aggressive**:
538
+ - Track private rounds (Commonwealth Fusion, Kairos Power); watch for IPO or secondary liquidity events.
539
+ - Monitor “picks and shovels” suppliers (engineering, digital, sensing, permitting).
540
+
541
+ - **Catalysts**:
542
+ - DOE pilot selections and project starts (late 2025/2026).
543
+ - Google/Microsoft/other tech-driven PPAs or partnerships.
544
+ - US and UK regulatory acceleration or major political support.
545
+
546
+ **Risks**: Execution slippage, cost overruns, regulatory reversals, or overhyped/illiquid microcaps. Fusion commercial viability remains >5-7 years out.
547
+
548
+ ---
549
+
550
+ # **Summary Table**
551
+
552
+ | Company | Ticker | Opportunity | Moat/Signal | Notes |
553
+ |------------------------|--------|------------------------|-----------------------------------|--------------------------------------------|
554
+ | Oklo | OKLO | Early pure play SMR | DOE pilot, TerraPower partnership | SPAC, recent, monitor valuation carefully |
555
+ | Centrus Energy | LEU | HALEU enrichment | Only US-capable, DOE contracts | High volatility |
556
+ | BWX Technologies | BWXT | Established supplier | Govt defense, recurring revenue | Steady, strong FCF & fundamentals |
557
+ | Commonwealth Fusion | – | Fusion, Google backing | Tech, strategic capital | Private, pre-IPO/2nd round watching |
558
+ | Kairos Power | – | SMR, Google offtake | Major tech validation | Private, track for IPO |
559
+ | Palantir Technologies | PLTR | Nuclear AI/software | 1st big software entrant | Not a pure play, watch ecosystem effects |
560
+
561
+ ---
562
+
563
+ ## **Bottom Line:**
564
+ *The investable landscape for nuclear is evolving rapidly—value investors should focus on companies bridging policy tailwind into real commercial assets, with an eye for US-centric supply, strategic contracts, and digital enablement of an emerging nuclear buildout cycle. Small/underfunded public names could offer asymmetric re-rating as the cycle unfolds.*
565
+
566
+ ---
567
+
568
+ ![nuclear energy](https://images.unsplash.com/photo-1630142895963-6996ae6b3a5b?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w1NzIzMjF8MHwxfHNlYXJjaHwxfHxudWNsZWFyJTIwZW5lcmd5fGVufDB8MHx8fDE3NDg5MzM1NDJ8MA&ixlib=rb-4.1.0&q=80&w=1080)
569
+
570
+ *Photo by <a href="https://unsplash.com/@llehotsky" target="_blank">Lukáš Lehotský</a> on <a href="https://unsplash.com" target="_blank">Unsplash</a>*
571
+
572
+
573
+ > **🧠 Metrics**
574
+ > - Topic: `nuclear energy`
575
+ > - Articles Collected: `69`
576
+ > - Generated: `2025-06-03 10:10`
577
+ >
578
+ # Nuclear Energy Value Investing Memo – Week Ending 2/June/2025
579
+
580
+ ## General Situation & Market Summary
581
+
582
+ This week marks a decisive shift for nuclear energy, fueled by sweeping pro-nuclear executive orders from the Trump administration, robust bipartisan support at the state and federal levels, and increased corporate demand from hyperscale data center operators such as Meta and Google [[1](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)][[2](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)][[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)]. The "nuclear renaissance" is manifest in regulatory accelerations, increased federal and state funding, and strategic contracts with Big Tech. Notably, the news cycle includes upgrades for nuclear stocks, significant venture funding rounds for AI-driven nuclear ventures, and government-backed SMR builds—plus ripple effects for upstream uranium miners.
583
+
584
+ **Market sentiment** is bullish on nuclear equities and technology providers. There's tangible momentum pouring into both legacy and disruptive names (especially SMR- and AI-aligned startups), although investors should note that capital costs and regulatory delays remain stubborn risks.
585
+
586
+ ---
587
+
588
+ ## 1. Key Value Signals
589
+
590
+ - **Executive Tailwinds:** New Trump EO’s support accelerated licensing, funding, and uranium supply chain resiliency — structural regulators eased for new builds [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)][[9](https://www.forbes.com/sites/llewellynking/2025/05/31/nuclear-golden-age-huge-potential-stubborn-obstacles/)].
591
+ - **State grants and approvals:** Texas passed a $350M nuclear grant program [[6](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)].
592
+ - **Strategic partnerships and PPA’s:** Google and Meta sign nuclear PPA deals; Kairos Power (private SMR leader) lands deals with Big Tech [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)].
593
+ - **Startups funded:** Atomic Canyon (AI for nuclear ops) closes $7M seed; strong VC and founder backing [[11](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)].
594
+ - **Stock Upgrades:** Oklo (OKLO), Centrus Energy (LEU), BWX Technologies (BWXT) upgraded by William Blair, explicitly tied to presidential actions [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
595
+ - **Uranium supply buzz:** Direct commentary from GTI Energy (ASX:GTR; uranium) spotlights bullish uranium price/volume thesis [[16](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)].
596
+ - **Tech-enabled nuclear:** Multiple deals for SMR technologies, digital AI ops, and nuclear for maritime/data infrastructure.
597
+
598
+
599
+ ---
600
+
601
+ ## 2. Stocks or Startups to Watch
602
+
603
+ ### Upgraded or in Play
604
+
605
+ #### Oklo (NASDAQ: OKLO) [Startup, Recent IPO]
606
+ - **What:** Microreactor/SMR company — major White House and sector tailwinds, newly public.
607
+ - **Catalyst:** Upgraded post-Trump EO; top beneficiary per analysts.
608
+ - **Valuation:** Pre-revenue, but tech moat and strategic government/energy partners.
609
+ - **Insider/Smart Money:** Backed by Sam Altman, Peter Thiel [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
610
+
611
+ #### Centrus Energy (AMEX: LEU)
612
+ - **What:** Uranium fuel supplier with US-centric value.
613
+ - **Metrics:** P/E ~11, P/B ~2, ROE ~22%; Market Cap ~$1.2B.
614
+ - **Catalyst:** Government support for US supply, upgraded by analysts.
615
+ - **Moat:** Key domestic enrichment capability.
616
+ - [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)], [[17](https://markets.ft.com/data/announce/detail?dockey=600-202505291748PR_NEWS_USPRX____PH99387-1)]
617
+
618
+ #### BWX Technologies (NYSE: BWXT)
619
+ - **What:** Reactors for US Navy (defense moat) & utilities.
620
+ - **Metrics:** P/E ~25, P/B ~5.8, ROE ~36%, Market Cap ~$8.6B.
621
+ - **Catalyst:** Upgrade on presidential support, huge federal contracts.
622
+ - [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)]
623
+
624
+ #### GTI Energy (ASX: GTR)
625
+ - **What:** Small-cap uranium developer, "uranium buzz" name.
626
+ - **Catalyst:** Publicly lauded tailwinds by CEO, levered to US uranium push.
627
+ - [[16](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)]
628
+
629
+ ### High-Impact Startups
630
+
631
+ #### Atomic Canyon (Private)
632
+ - **What:** AI for nuclear compliance, ops, and maintenance (B2B SaaS).
633
+ - **Catalyst:** Landed Diablo Canyon (major US plant) as client, $7M seed from Energy Impact Partners, Commonweal, Plug and Play, Tower Research, Wischoff.
634
+ - **Signal:** Well-connected investors, strategic bridge between AI and nuclear infra.
635
+ - [[11](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)], [[12](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)]
636
+
637
+ #### Kairos Power (Private)
638
+ - **What:** US SMR developer, Google’s first SMR PPA.
639
+ - **Catalyst:** Strategic proof-point for SMR commercialization, signaling major institutional validation.
640
+ - [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)]
641
+
642
+ ---
643
+
644
+ ## 3. What Smart Money Might Be Acting On
645
+
646
+ - **Venture backers:** Energy Impact Partners, Plug and Play, Tower Research are betting on Atomic Canyon, validating AI’s inevitable role in nuclear digitization [[12](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)].
647
+ - **Insider investors:** Sam Altman, Peter Thiel, and other Silicon Valley luminaries are aligned to Oklo, a sign of big-ticket belief in next-gen reactors [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
648
+ - **Tech majors:** Google (via SMR PPA with Kairos Power) and Meta (exploring nuclear for data centers) are unlikely to backtrack — durable, volume offtake validation [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)], [[2](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)].
649
+ - **Active upgrades:** William Blair and others raising targets for BWXT, LEU, and OKLO immediately after White House/regulatory actions [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
650
+
651
+ ---
652
+
653
+ ## 4. References
654
+
655
+ - [Forbes: “Why America Must Double Down On Nuclear Energy”](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)
656
+ - [Forbes: “Gas, Nuclear, Renewables Battle Over Power For Meta’s New Data Center”](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)
657
+ - [The Guardian: “Tide turning in Europe and beyond in favour of nuclear power”](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)
658
+ - [Investor's Business Daily: “Trump's 'Consequential' Shift In Energy Policy Fuels Upgrades For These Nuclear Stocks”](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)
659
+ - [GovTech: “Texas Senate Passes $350M Grant Program for Nuclear Power”](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)
660
+ - [TechCrunch: “Atomic Canyon wants to be ChatGPT for the nuclear industry”](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)
661
+ - [Axios: Venture deal coverage](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
662
+ - [Mining.com.au: “Trump’s nuclear push ignites uranium buzz”](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)
663
+ - [Centrus company announcement](https://markets.ft.com/data/announce/detail?dockey=600-202505291748PR_NEWS_USPRX____PH99387-1)
664
+ - [Insurance Journal: TVA/SMR permit news](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)
665
+
666
+ ---
667
+
668
+ ## 5. Investment Hypothesis
669
+
670
+ The current newsflow marks a **structural inflection point for nuclear energy in the US and allied markets**. Catalyst stacking — from bipartisan support, federal and state grants, White House executive orders, to urgent demand from hyperscale data centers and defense — is driving multiple fundamental and trigger events:
671
+
672
+ - **Oklo (OKLO):** Early-stage, speculative but with tech and regulatory moats, institutional and insider backing, and direct ties to US policy. Potential 5–10x if it achieves early commercial milestones.
673
+ - **Centrus Energy (LEU):** Profitable, unique “picks and shovels” play on US fuel sovereignty, undervalued relative to new cash flows and policy tailwinds.
674
+ - **BWX Technologies (BWXT):** Mid-/large cap with recession-resistant defense and civil reactor businesses; ideal for institutional portfolios seeking balance.
675
+ - **Atomic Canyon:** Private, but a “future pick-and-shovel” for digital ops in nuclear—evidence of VC smart money converging on the sector.
676
+
677
+ **Downside risks:** Regulatory overhangs, cost overruns, and safety/lobbying backlash could impede rapid nuclear scaling—tempering parabolic runs.
678
+
679
+ **Conclusion:**
680
+ **This week’s news cements nuclear as a durable, high-growth infrastructure theme for the next decade with both policy and institutional tailwinds.** Well-run, undervalued or newly upgraded public nuclear stocks—especially with alignment to supply (LEU), defense (BWXT), and innovative new build (OKLO)—present strong upside. Meanwhile, closely follow VC and Big Tech’s footprints for future SMR and AI-software-linked deals.
681
+
682
+ ---
683
+
684
+ **Summary Table: Potential Picks**
685
+
686
+ | Company | Ticker | Market Cap | P/E | ROE | Catalyst |
687
+ | --------------- | ------ | ------------ | ----- | ----- | --------------------------- |
688
+ | Oklo | OKLO | ~$560M | — | — | SMR, gov/insider backing |
689
+ | Centrus Energy | LEU | ~$1.2B | ~11 | ~22% | Uranium, analyst upgrades |
690
+ | BWX Technologies| BWXT | ~$8.6B | ~25 | ~36% | Defense, U.S. Navy, gov’t |
691
+ | GTI Energy | GTR | ~$40M (AUD) | — | — | Uranium, U.S. expansion |
692
+ | Atomic Canyon | — | Private | — | — | AI SaaS, Diabolo Canyon win |
693
+ | Kairos Power | — | Private | — | — | Google SMR PPA |
694
+
695
+ *Data based on latest available annual/quarterly filings and estimates.*
696
+
697
+ ---
698
+
699
+ ---
700
+
data/mining_2025-07-04.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > Topic: `Mining`
3
+ > Articles Collected: `172`
4
+ > Generated: `2025-07-04 14:17`
5
+ >
6
+ # Weekly Value Investing Memo: Mining Sector (June 28 - July 4, 2025)
7
+
8
+ ---
9
+
10
+ ## Overview: Sentiment & Market Trends
11
+
12
+ The mining industry sees a highly dynamic week, marked by continued innovation in automation, M&A activity in minerals (especially gold and copper), and further fundraising across both established players and small caps. Macro themes include:
13
+ - **Global demand for critical minerals (EVs, batteries) fueling competition for assets**.
14
+ - **Rising capital investment driven by gold's record rally** and the green transition’s need for base metals.
15
+ - **Persistently challenging regulatory and policy environments**, particularly in emerging markets (Nigeria, South Africa).
16
+ - **Strong capital flows into mining equipment and automation**, hinting at structurally higher sector profitability for lean, tech-driven operators.
17
+
18
+ ---
19
+
20
+ ## 1. Key Value Signals
21
+
22
+ - **High insider/institutional participation in miners and suppliers**: Examples include Cascadia Minerals’ successful oversubscribed placement and large-scale deals like Zijin’s $1.2B Kazakh gold mine buy.
23
+ - **Sector consolidation**: Small/mid-cap mergers (Brightstar-Aurumin talks) may unlock scale and cost synergies, often a value signal.
24
+ - **Strategic acquisitions and funding rounds in upstream enablers**: Notably, Terra CO2’s $124M for green cement and the University of Queensland’s training program reflect future-oriented bets.
25
+ - **Investor interest in mining equipment, automation, and sustainability**: Industry reports highlight accelerating tech adoption, with larger miners seeking cost controls and ESG advantages.
26
+ - **Government and regulatory headwinds an explicit risk**: Policy ambiguity or hostile regimes (Nigeria, South Africa) remain a clear negative screen.
27
+
28
+ ---
29
+
30
+ ## 2. Stocks or Startups to Watch
31
+
32
+ ### **Cascadia Minerals Ltd. [TSXV: CAM]**
33
+ - **Event**: Raised C$2.27M in oversubscribed private placement for the acquisition of Granite Creek Copper and funding of the Carmacks Project.
34
+ - **Value Angle**: Small-cap, significant insider demand, focus on copper (structural under-supply theme). Acquisition could add scale and resource upside.
35
+ - **Fundamentals**: Pre-revenue, but well-capitalized, strategic Yukon assets, optionality on copper cycle. Monitor P/B and dilution risk post-acquisition.
36
+
37
+ ### **Terra CO2**
38
+ - **Event**: Secured $124M Series B for low-carbon cement product.
39
+ - **Value Angle**: Downstream of mining (aggregates, cement); sits at the ESG/green infra nexus. Major institutional support signals sector-wide bet on carbon reduction in heavy industry.
40
+ - **Fundamentals**: Still private, but possible IPO watch for first-mover “green cement” plays with mining tie-ins.
41
+
42
+ ### **Brightstar Resources [ASX: BTR] / Aurumin [ASX: AUN]**
43
+ - **Event**: Potential merger under negotiation (Central Sandstone, WA gold tenements).
44
+ - **Value Angle**: Sector consolidation at the small-cap level; possible cost reduction, resource optimization. Neither yet a sector leader but could unlock scale economics if deal completes.
45
+ - **Fundamentals**: Consider on basis of NAV discount, debt levels, historic cash burn.
46
+
47
+ ### **Zijin Mining [HKG: 2899]**
48
+ - **Event**: $1.2B acquisition of a Kazakh gold mine; pursuing HK listing of international assets.
49
+ - **Value Angle**: Massive balance sheet, levered to gold, aggressive expansion. Not classic ‘cheap’ value, but a play on size/moat, Chinese state alignment, and precious metals bull run.
50
+
51
+ ---
52
+
53
+ ## 3. What Smart Money Might Be Acting On
54
+
55
+ - **Resource-constrained supply chains**: Institutions chasing assets in the copper, gold, and specialty metals space for long-term price support; Cascadia’s oversubscribed raise hints at smart capital flow into critical minerals.
56
+ - **Green and tech-enabled mining infrastructure**: Funds flowing into equipment/automation as large miners invest to cut OPEX and meet sustainability mandates.
57
+ - **Early-stage innovation bets**: University/industry collabs (Wheaton/UBC, MRIWA scholarships) suggest VC/PE will chase enabling tech, not just resource ownership.
58
+ - **Selective asset consolidation**: Sophisticated holders may see sub-scale gold/copper/junior plays as efficient entry points during cyclical troughs or when M&A premiums are small.
59
+ - **Avoidance of poorly governed or policy-risked geographies**: Smart money is likely avoiding high regulatory risk countries (Nigeria, South Africa) unless assets are truly world-class.
60
+
61
+ ---
62
+
63
+ ## 4. References
64
+
65
+ - [Cascadia Minerals oversubscribed financing (TipRanks)](https://www.tipranks.com/news/company-announcements/cascadia-minerals-secures-c2-27m-in-oversubscribed-financing-for-strategic-acquisition)
66
+ - [Terra CO2 $124M Series B (Startup Ecosystem Canada)](https://www.startupecosystem.ca/news/terra-co2-secures-124m-series-b-for-low-carbon-cement/)
67
+ - [Zijin Mining’s Kazakh gold mine buy (Mining.com)](https://www.mining.com/web/zijin-mining-to-acquire-kazakh-gold-mine-in-1-2b-deal/)
68
+ - [Brightstar Resources/Aurumin merger discussion (Mining.com.au)](https://mining.com.au/brightstar-probes-aurumin-merger-discussions/)
69
+ - [Mining equipment/automation market surge (openPR)](https://www.openpr.com/news/4092566/global-mining-equipment-market-surges-amid-automation-green)
70
+ - [Wheaton $1M Future of Mining Challenge (Mining.com)](https://www.mining.com/blog/wheaton-precious-metals-brings-back-1m-future-of-mining-challenge)
71
+
72
+ ---
73
+
74
+ ## 5. Investment Hypothesis
75
+
76
+ The mining sector is entering another phase of capital allocation discipline, flagged by:
77
+ - Minor vs. major M&A as juniors combine to gain efficiency,
78
+ - Technology/adoption waves as automation and green mandates favor opex-lean operators,
79
+ - Smart money preferring critical minerals, automation, and ESG-enabled suppliers,
80
+ - And value found in overlooked small-caps pursuing strategic, low-cost acquisitions (see Cascadia Minerals), especially those exposed to copper/gold.
81
+
82
+ Mining’s cyclical, capital-intensive nature means margins will accrue to firms with solid moats (resource quality, cost, governance). The best value is likely among well-financed, proven junior miners with clear catalysts (M&A, new discoveries, scale), or private enablers with a roadmap to public markets (like Terra CO2).
83
+
84
+ Regulatory and macro risks (e.g., policy instability in Nigeria/South Africa) make jurisdiction and balance sheet strength paramount for downside protection. Investors should use screeners (P/E, P/B, ROE, FCF) to filter for relative value, but back this with assessment of jurisdictional and operational risk.
85
+
86
+ ---
87
+
88
+ ### Conclusion:
89
+ **Watch for follow-on financings, consolidation deals, and private-to-public transition among mining innovators and critical mineral players. Prioritize companies with clear capital discipline, high insider/institutional ownership, and strong strategic rationale for growth.**
data/nuclear_energy_2025-06-03.md ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ![nuclear energy](https://images.unsplash.com/photo-1630142895963-6996ae6b3a5b?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3w1NzIzMjF8MHwxfHNlYXJjaHwxfHxudWNsZWFyJTIwZW5lcmd5fGVufDB8MHx8fDE3NDg5MzM1NDJ8MA&ixlib=rb-4.1.0&q=80&w=1080)
2
+
3
+ *Photo by <a href="https://unsplash.com/@llehotsky" target="_blank">Lukáš Lehotský</a> on <a href="https://unsplash.com" target="_blank">Unsplash</a>*
4
+
5
+
6
+ > **🧠 Metrics**
7
+ > - Topic: `nuclear energy`
8
+ > - Articles Collected: `69`
9
+ > - Generated: `2025-06-03 10:10`
10
+ >
11
+ # Nuclear Energy Value Investing Memo – Week Ending 2/June/2025
12
+
13
+ ## General Situation & Market Summary
14
+
15
+ This week marks a decisive shift for nuclear energy, fueled by sweeping pro-nuclear executive orders from the Trump administration, robust bipartisan support at the state and federal levels, and increased corporate demand from hyperscale data center operators such as Meta and Google [[1](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)][[2](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)][[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)]. The "nuclear renaissance" is manifest in regulatory accelerations, increased federal and state funding, and strategic contracts with Big Tech. Notably, the news cycle includes upgrades for nuclear stocks, significant venture funding rounds for AI-driven nuclear ventures, and government-backed SMR builds—plus ripple effects for upstream uranium miners.
16
+
17
+ **Market sentiment** is bullish on nuclear equities and technology providers. There's tangible momentum pouring into both legacy and disruptive names (especially SMR- and AI-aligned startups), although investors should note that capital costs and regulatory delays remain stubborn risks.
18
+
19
+ ---
20
+
21
+ ## 1. Key Value Signals
22
+
23
+ - **Executive Tailwinds:** New Trump EO’s support accelerated licensing, funding, and uranium supply chain resiliency — structural regulators eased for new builds [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)][[9](https://www.forbes.com/sites/llewellynking/2025/05/31/nuclear-golden-age-huge-potential-stubborn-obstacles/)].
24
+ - **State grants and approvals:** Texas passed a $350M nuclear grant program [[6](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)].
25
+ - **Strategic partnerships and PPA’s:** Google and Meta sign nuclear PPA deals; Kairos Power (private SMR leader) lands deals with Big Tech [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)].
26
+ - **Startups funded:** Atomic Canyon (AI for nuclear ops) closes $7M seed; strong VC and founder backing [[11](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)].
27
+ - **Stock Upgrades:** Oklo (OKLO), Centrus Energy (LEU), BWX Technologies (BWXT) upgraded by William Blair, explicitly tied to presidential actions [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
28
+ - **Uranium supply buzz:** Direct commentary from GTI Energy (ASX:GTR; uranium) spotlights bullish uranium price/volume thesis [[16](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)].
29
+ - **Tech-enabled nuclear:** Multiple deals for SMR technologies, digital AI ops, and nuclear for maritime/data infrastructure.
30
+
31
+
32
+ ---
33
+
34
+ ## 2. Stocks or Startups to Watch
35
+
36
+ ### Upgraded or in Play
37
+
38
+ #### Oklo (NASDAQ: OKLO) [Startup, Recent IPO]
39
+ - **What:** Microreactor/SMR company — major White House and sector tailwinds, newly public.
40
+ - **Catalyst:** Upgraded post-Trump EO; top beneficiary per analysts.
41
+ - **Valuation:** Pre-revenue, but tech moat and strategic government/energy partners.
42
+ - **Insider/Smart Money:** Backed by Sam Altman, Peter Thiel [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
43
+
44
+ #### Centrus Energy (AMEX: LEU)
45
+ - **What:** Uranium fuel supplier with US-centric value.
46
+ - **Metrics:** P/E ~11, P/B ~2, ROE ~22%; Market Cap ~$1.2B.
47
+ - **Catalyst:** Government support for US supply, upgraded by analysts.
48
+ - **Moat:** Key domestic enrichment capability.
49
+ - [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)], [[17](https://markets.ft.com/data/announce/detail?dockey=600-202505291748PR_NEWS_USPRX____PH99387-1)]
50
+
51
+ #### BWX Technologies (NYSE: BWXT)
52
+ - **What:** Reactors for US Navy (defense moat) & utilities.
53
+ - **Metrics:** P/E ~25, P/B ~5.8, ROE ~36%, Market Cap ~$8.6B.
54
+ - **Catalyst:** Upgrade on presidential support, huge federal contracts.
55
+ - [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)]
56
+
57
+ #### GTI Energy (ASX: GTR)
58
+ - **What:** Small-cap uranium developer, "uranium buzz" name.
59
+ - **Catalyst:** Publicly lauded tailwinds by CEO, levered to US uranium push.
60
+ - [[16](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)]
61
+
62
+ ### High-Impact Startups
63
+
64
+ #### Atomic Canyon (Private)
65
+ - **What:** AI for nuclear compliance, ops, and maintenance (B2B SaaS).
66
+ - **Catalyst:** Landed Diablo Canyon (major US plant) as client, $7M seed from Energy Impact Partners, Commonweal, Plug and Play, Tower Research, Wischoff.
67
+ - **Signal:** Well-connected investors, strategic bridge between AI and nuclear infra.
68
+ - [[11](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)], [[12](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)]
69
+
70
+ #### Kairos Power (Private)
71
+ - **What:** US SMR developer, Google’s first SMR PPA.
72
+ - **Catalyst:** Strategic proof-point for SMR commercialization, signaling major institutional validation.
73
+ - [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)]
74
+
75
+ ---
76
+
77
+ ## 3. What Smart Money Might Be Acting On
78
+
79
+ - **Venture backers:** Energy Impact Partners, Plug and Play, Tower Research are betting on Atomic Canyon, validating AI’s inevitable role in nuclear digitization [[12](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)].
80
+ - **Insider investors:** Sam Altman, Peter Thiel, and other Silicon Valley luminaries are aligned to Oklo, a sign of big-ticket belief in next-gen reactors [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
81
+ - **Tech majors:** Google (via SMR PPA with Kairos Power) and Meta (exploring nuclear for data centers) are unlikely to backtrack — durable, volume offtake validation [[3](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power/)], [[2](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)].
82
+ - **Active upgrades:** William Blair and others raising targets for BWXT, LEU, and OKLO immediately after White House/regulatory actions [[4](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)].
83
+
84
+ ---
85
+
86
+ ## 4. References
87
+
88
+ - [Forbes: “Why America Must Double Down On Nuclear Energy”](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)
89
+ - [Forbes: “Gas, Nuclear, Renewables Battle Over Power For Meta’s New Data Center”](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)
90
+ - [The Guardian: “Tide turning in Europe and beyond in favour of nuclear power”](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)
91
+ - [Investor's Business Daily: “Trump's 'Consequential' Shift In Energy Policy Fuels Upgrades For These Nuclear Stocks”](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)
92
+ - [GovTech: “Texas Senate Passes $350M Grant Program for Nuclear Power”](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)
93
+ - [TechCrunch: “Atomic Canyon wants to be ChatGPT for the nuclear industry”](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)
94
+ - [Axios: Venture deal coverage](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
95
+ - [Mining.com.au: “Trump’s nuclear push ignites uranium buzz”](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)
96
+ - [Centrus company announcement](https://markets.ft.com/data/announce/detail?dockey=600-202505291748PR_NEWS_USPRX____PH99387-1)
97
+ - [Insurance Journal: TVA/SMR permit news](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)
98
+
99
+ ---
100
+
101
+ ## 5. Investment Hypothesis
102
+
103
+ The current newsflow marks a **structural inflection point for nuclear energy in the US and allied markets**. Catalyst stacking — from bipartisan support, federal and state grants, White House executive orders, to urgent demand from hyperscale data centers and defense — is driving multiple fundamental and trigger events:
104
+
105
+ - **Oklo (OKLO):** Early-stage, speculative but with tech and regulatory moats, institutional and insider backing, and direct ties to US policy. Potential 5–10x if it achieves early commercial milestones.
106
+ - **Centrus Energy (LEU):** Profitable, unique “picks and shovels” play on US fuel sovereignty, undervalued relative to new cash flows and policy tailwinds.
107
+ - **BWX Technologies (BWXT):** Mid-/large cap with recession-resistant defense and civil reactor businesses; ideal for institutional portfolios seeking balance.
108
+ - **Atomic Canyon:** Private, but a “future pick-and-shovel” for digital ops in nuclear—evidence of VC smart money converging on the sector.
109
+
110
+ **Downside risks:** Regulatory overhangs, cost overruns, and safety/lobbying backlash could impede rapid nuclear scaling—tempering parabolic runs.
111
+
112
+ **Conclusion:**
113
+ **This week’s news cements nuclear as a durable, high-growth infrastructure theme for the next decade with both policy and institutional tailwinds.** Well-run, undervalued or newly upgraded public nuclear stocks—especially with alignment to supply (LEU), defense (BWXT), and innovative new build (OKLO)—present strong upside. Meanwhile, closely follow VC and Big Tech’s footprints for future SMR and AI-software-linked deals.
114
+
115
+ ---
116
+
117
+ **Summary Table: Potential Picks**
118
+
119
+ | Company | Ticker | Market Cap | P/E | ROE | Catalyst |
120
+ | --------------- | ------ | ------------ | ----- | ----- | --------------------------- |
121
+ | Oklo | OKLO | ~$560M | — | — | SMR, gov/insider backing |
122
+ | Centrus Energy | LEU | ~$1.2B | ~11 | ~22% | Uranium, analyst upgrades |
123
+ | BWX Technologies| BWXT | ~$8.6B | ~25 | ~36% | Defense, U.S. Navy, gov’t |
124
+ | GTI Energy | GTR | ~$40M (AUD) | — | — | Uranium, U.S. expansion |
125
+ | Atomic Canyon | — | Private | — | — | AI SaaS, Diabolo Canyon win |
126
+ | Kairos Power | — | Private | — | — | Google SMR PPA |
127
+
128
+ *Data based on latest available annual/quarterly filings and estimates.*
129
+
130
+ ---
data/nuclear_energy_2025-06-03_1.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > **Metrics**
3
+ > Topic: `nuclear energy`
4
+ > Articles Collected: `60`
5
+ > Generated: `2025-06-03 11:52`
6
+ >
7
+ # Nuclear Energy: Value Investing Focus – Week Ending 2/June/2025
8
+
9
+ ---
10
+ ## Intro: Market Context and Week Summary
11
+
12
+ Nuclear energy took center stage this week, driven by major executive moves in U.S. energy policy, heightened demand from AI/data centers, and investor/VC excitement about SMRs (small modular reactors). With Trump’s administration rolling out pro-nuclear executive orders and Europe/Asia accelerating new builds, public and private capital is steadily shifting back into nuclear plays. The macro environment is bullish: regulatory timelines are shortening, capital support is rising, and energy stability/cleanliness place nuclear above wind and solar in AI-focused grid conversations. On the ground: several companies (including Oklo, BWX Technologies, and Centrus) received analyst upgrades, utilities are racing to deploy SMRs, and nuclear-tech startups are pulling in fresh VC funds. Smart money is watching supply chains (uranium), next-gen reactors, and infrastructure/enabling tech for nuclear’s new "golden age."
13
+
14
+ ---
15
+
16
+ ## 1. Key Value Signals
17
+
18
+ - **Major U.S. Policy Shift**: New Trump administration executive orders to accelerate nuclear tech approval, reduce permitting times and support uranium supply chains ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/), [Forbes](https://www.forbes.com/sites/llewellynking/2025/05/31/nuclear-golden-age-huge-potential-stubborn-obstacles/)).
19
+ - **Big Tech Partnership Moves**: Google (and earlier, Meta) inking first agreements with small modular reactor developers ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)).
20
+ - **Startups & VC Funding Rounds**: Atomic Canyon (AI for nuclear), Kairos Power, and others drawing new funding ([Axios](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium), [TechCrunch](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)).
21
+ - **Utility Action on SMRs**: TVA becomes first U.S. utility to seek permit for SMR, indicating a path for future orders ([Insurance Journal](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)).
22
+ - **Analyst Upgrades and Insider Buys**: Oklo (OKLO), Centrus Energy (LEU), and BWX Technologies (BWXT) upgraded ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
23
+ - **Strong Fundamental Tailwinds**:
24
+ - **Low P/E, Strong ROE/FCF**: Several nuclear/uranium plays trading below market P/E, generating high free cash flow, with secular macro demand increases.
25
+ - **Moats Emerging**: Through regulatory complexity, IP, and public-private partnerships.
26
+
27
+ ---
28
+
29
+ ## 2. Stocks or Startups to Watch
30
+
31
+ ### **Listed Stocks**
32
+
33
+ #### **Oklo (OKLO)**
34
+ - **Trigger:** Analyst upgrades post-Trump nuclear EO, SMR play, strong U.S. government support ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/))
35
+ - **Fundamentals:** Newly public (<6 months), early FMC/S-1 data. Moat: First SMR in pipeline, government/tech sector contracts.
36
+ - **Metric:** Expected SMR deployment, contract pipeline not yet priced in.
37
+
38
+ #### **Centrus Energy (LEU)**
39
+ - **Trigger:** Upgraded, uranium supply chain play; critical to new U.S. nuclear push ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/))
40
+ - **P/E:** ~13 ([Yahoo Finance](https://finance.yahoo.com/quote/LEU/))
41
+ - **ROE:** ~27%
42
+ - **Market Cap:** ~$650M
43
+ - **Comment:** Only U.S. uranium enrichment capability, crucial as U.S. looks to de-risk from Russia ([Mining.com.au](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)).
44
+
45
+ #### **BWX Technologies (BWXT)**
46
+ - **Trigger:** Major reactor supplier for U.S. Navy and DoE, among first to benefit from process acceleration ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
47
+ - **P/E:** ~24
48
+ - **ROE:** ~35%
49
+ - **Moat:** Navy sole-source positioning, R&D, U.S. government contracts.
50
+ - **Market Cap:** ~$10B
51
+
52
+ #### **NuScale Power (SMR)**
53
+ - **Trigger:** NRC has approved SMR design, clearing path for deployment ([Utility Dive](https://www.utilitydive.com/news/nrc-approves-nuscale-small-modular-reactor-smr/749538/))
54
+ - **Metric:** High short interest post-IPO, but new regulatory tailwinds. Watch for major contract wins.
55
+
56
+ #### **Paladin Energy (PDN.AX)**
57
+ - **Trigger:** Making moves at Patterson Lake as uranium demand surges with U.S. and global SMR build ([Mining.com.au](https://mining.com.au/paladin-proceeds-at-patterson-lake/)).
58
+ - **Comment:** Undervalued relative to long-term uranium price upcycle.
59
+
60
+ ### **Startups & Undercapitalized Opportunities**
61
+
62
+ - **Atomic Canyon**: AI-powered B2B software for nuclear industry. Raised $7M seed led by Energy Impact Partners (backers of several energy unicorns). Aim: “ChatGPT for nuclear” ([TechCrunch](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/))
63
+
64
+ - **Kairos Power**: Leading small modular reactor startup—Google is the first customer for future SMR energy. (direct purchase PPA) ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power))
65
+
66
+ - **Type One Energy**: Fusion startup, just completed formal initial design review ([Power Magazine](https://www.powermag.com/avangrid-investing-41-million-to-rebuild-ny-grid-infrastructure/)).
67
+
68
+ ---
69
+
70
+ ## 3. What Smart Money Might Be Acting On
71
+
72
+ - **Venture/Institutional**: Top-tier VCs (Energy Impact Partners, Plug and Play, Tower Research) making preemptive moves into enabling tech/software (e.g., Atomic Canyon).
73
+ - **Corporate Power Users (Big Tech)**: Google, Meta inking deals with SMR startups—future demand signal for new nuclear ([The Guardian](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)).
74
+ - **Analyst Coverage/Upgrades**: William Blair’s initiation on OKLO, LEU, and BWXT signals Wall Street is waking up to regulatory + macro catalysts ([Investor's Business Daily](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)).
75
+ - **Utilities/State Action**: TVA and Texas moving to lead SMR deployment and streamline permitting—possible template for state-federal partnerships ([Insurance Journal](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm), [GovTech](https://www.govtech.com/products/texas-senate-passes-350m-grant-program-for-nuclear-power)).
76
+ - **Insider-Led Companies**: Centrus Energy (LEU, ex-government insiders, U.S.-centric contracts), Oklo (deep government, tech ecosystem relationships).
77
+
78
+ ---
79
+
80
+ ## 4. References/Sources
81
+
82
+ - [Forbes - U.S. must double down on nuclear](https://www.forbes.com/sites/billfrist/2025/05/29/powering-the-future-why-america-must-double-down-on-nuclear-energy/)
83
+ - [Forbes - Data Center Energy Wars](https://www.forbes.com/sites/ianpalmer/2025/05/27/gas-nuclear-renewables-battle-over-power-for-metas-new-data-center/)
84
+ - [The Guardian - Tech firms buy SMR power](https://www.theguardian.com/environment/2025/jun/01/tide-turning-europe-beyond-favour-nuclear-power)
85
+ - [Investor's Business Daily - Nuclear stocks upgraded](https://www.investors.com/news/trump-executive-orders-fuel-nuclear-stocks-upgrade-stock-market/)
86
+ - [Axios - Atomic Canyon B2B seed](https://www.axios.com/pro/all-deals/2025/05/28/first-look-pro-rata-premium)
87
+ - [TechCrunch - Atomic Canyon profile](https://techcrunch.com/2025/05/28/atomic-canyon-wants-to-be-chatgpt-for-the-nuclear-industry/)
88
+ - [Insurance Journal - TVA SMR permit](https://www.insurancejournal.com/news/southeast/2025/05/27/825158.htm)
89
+ - [Utility Dive – NRC approves NuScale SMR design](https://www.utilitydive.com/news/nrc-approves-nuscale-small-modular-reactor-smr/749538/)
90
+ - [Mining.com.au – Centrus/Paladin/uranium momentum](https://mining.com.au/trumps-nuclear-push-ignites-uranium-buzz/)
91
+ - [Yahoo Finance – LEU Key Stats](https://finance.yahoo.com/quote/LEU/)
92
+
93
+ ---
94
+
95
+ ## 5. Investment Hypothesis
96
+
97
+ **Thesis:**
98
+ Recent regulatory and policy catalysts have created a structural tailwind for both incumbent and next-gen nuclear energy firms, particularly those exposed to SMRs, uranium refining, and critical enabling tech/software. The current market underappreciates the scale and allocation speed of coming capital inflows (from utilities, governments, and data cloud majors). Valuations (esp. in uranium and contractors) remain attractive on a P/E and FCF basis compared to wind/solar.
99
+
100
+ - **Buy candidates:** Oklo (OKLO), Centrus (LEU), BWX Technologies (BWXT), Paladin (PDN.AX), NuScale (SMR)
101
+ - **Venture/early-exposure:** Consider gaining VC fund/PE exposure to emerging nuclear tech/software infrastructure (e.g., Atomic Canyon, Kairos Power).
102
+ - **Rationale:** U.S./global policy, increased AI power grid demand, and high barriers to entry combine for exceptional medium/long-term risk/reward—especially after this week’s “regime change” in sentiment and regulation.
103
+
104
+ **Monitor:**
105
+ New contract wins for SMR developers. U.S. uranium production and enrichment capacity (LEU). Expansion or new partnerships with tech/utility majors. Insider ownership trends and further analyst coverage for nuclear sector plays.
106
+
107
+ ---
108
+
109
+ ### Overall: This week’s news offers a clear “green light” for value investors in nuclear, particularly those seeking both deep value (LEU, BWXT) and long-tail growth via platform/SMR innovators (OKLO, Kairos, NuScale). U.S. government and major tech-firm endorsement serves as powerful affirmation for the sector’s re-rating.
110
+
111
+ ---
data/nuclear_energy_2025-07-02.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > Topic: `nuclear energy`
3
+ > Articles Collected: `133`
4
+ > Generated: `2025-07-02 20:18`
5
+ >
6
+ # Nuclear Energy Weekly Value Investing Memo
7
+ **Week of July 1, 2025**
8
+
9
+ ---
10
+
11
+ ### **Market Sentiment & Trends**
12
+ This week’s news reconfirms nuclear energy’s rising status as both a grid reliability solution and a strategic utility for tech and industrial growth. Demand drivers include:
13
+ - Growing AI/data center needs (Google, Microsoft, Amazon heavily engaged)
14
+ - Policy tailwinds and new US DOE initiatives
15
+ - New partnerships and investments from leading tech and engineering firms
16
+ - Heightened urgency, both industrially and politically, for next-gen nuclear and advanced enrichment.
17
+
18
+ The overall sentiment is incrementally positive: there’s powerful momentum for nuclear expansion (especially advanced/small modular/fusion), but major regulatory, funding, and execution risks remain.
19
+
20
+ ---
21
+
22
+ ## 1. **Key Value Signals**
23
+
24
+ - **Big Tech Putting Capital to Work**: Google commits to buying electricity from both *fusion* (Commonwealth Fusion Systems) and *fission* (Kairos Power—an SMR startup), signaling a long-term offtake demand for clean nuclear output. These deals, while years out, anchor real business models and future cash flows in an industry where certainty has been rare.
25
+
26
+ - **DOE Fast-Tracks Advanced Nuclear**: The US Department of Energy (DOE) launched a pilot program to authorize *private* test reactors—removing a longstanding barrier for early-stage and test deployments. This regulatory facilitation could accelerate revenue opportunities for startups.
27
+
28
+ - **AI Meets Nuclear Construction**: Palantir—a leader in data analytics—announced its software will drive efficiency in reactor construction (with “The Nuclear Company”), signaling an ecosystem of digital infrastructure forming around new builds.
29
+
30
+ - **Strategic Collaborations**: Oklo (recent SPAC, high-profile leadership) and Bill Gates’ TerraPower signed a partnership around domestic HALEU enrichment—critical for next-generation reactors and a US supply chain play.
31
+
32
+ - **Major Fusion Funding**: Westinghouse and ITER sign a $180M contract to push fusion technology, while global fusion market size forecasts surge.
33
+
34
+ - **IPO and Recent SPAC Activity**: Oklo’s public listing, ongoing chatter around SMR startups seeking either funding or public exits.
35
+
36
+ ---
37
+
38
+ ## 2. **Stocks or Startups to Watch**
39
+
40
+ **A. Public/Recent IPO & Small Cap Opportunities**
41
+ - **Oklo (NYSE: OKLO)**
42
+ - **Profile**: Recent SPAC debut; backed by substantial leadership and Bill Gates’ circle via TerraPower collaboration.
43
+ - **Signals**: Strategic partnerships, domestic enrichment angle, close alignment with DOE pilot regulatory streamlining.
44
+ - **Check**: Valuation (historically rich for early-stage nuclear), business execution, and regulatory milestones.
45
+
46
+ - **Kairos Power (private, but IPO/speculation possible)**
47
+ - **Profile**: Small modular reactor company. Google offtake deal is a significant vote of confidence.
48
+ - **Signals**: Market validation, long-term revenue anchor (if plant comes online).
49
+
50
+ - **Commonwealth Fusion Systems (private)**
51
+ - **Profile**: Leading fusion startup; Google as an offtaker/investor.
52
+ - **Signals**: Earliest in its lifecycle, but with elite backing. Watch for pre-IPO funding rounds and cap table changes.
53
+
54
+ **B. Established, Undervalued Nuclear Plays (Check Valuation/Fundamentals)**
55
+ - **BWX Technologies (NYSE: BWXT)**
56
+ - **Profile**: Established supplier for nuclear reactors and specialized components.
57
+ - **Moat**: Deep US government/defense contracts, emerging advanced reactor supply role.
58
+ - **Valuation**: P/E ratio tends to be market-comparable, but free cash flow strong and recurring revenue profile.
59
+ - **Signal**: Exposure to multiple advanced reactor programs, SMR rollout, and robust political support.
60
+
61
+ - **Centrus Energy (NYSEMKT: LEU)**
62
+ - **Profile**: Only US public company with commercial uranium enrichment capability—potential HALEU winner.
63
+ - **Signals**: Vital for fueling advanced reactors; highly levered to new DOE policies.
64
+ - **Risks**: Small cap, volatile, but high convexity if advanced nuclear takes off in '26+.
65
+
66
+ **C. Infrastructure, EPC, and Software**
67
+ - **Palantir Technologies (NYSE: PLTR)**
68
+ - **Profile**: Now branching into nuclear with specialized construction/efficiency software.
69
+ - **Signal**: Long-term, stickier defense/critical infrastructure business.
70
+
71
+ ---
72
+
73
+ ## 3. **What Smart Money Might Be Acting On**
74
+
75
+ - **Pre-emptive Strategic Investment**: Major techs (Google especially) are locking in low-carbon electricity contracts before physical infrastructure is built. Early investor entry into fusion/SMR supply chains could offer “picks & shovels” asymmetry.
76
+
77
+ - **Pivot to Domestic Supply Chain**: Oklo/TerraPower collaboration for HALEU enrichment directly addresses “made in America” energy/defense policy. This is the tip of a deglobalization and re-onshoring trend—any US enrichment or SMR component supplier could be in play.
78
+
79
+ - **Software/Services Layer**: The nuclear restart will bring new opportunities for “enabling” firms: EPC (AECOM, AtkinsRéalis, Arup), new digital/digital twins/AI (Palantir), and regulatory facilitators.
80
+
81
+ - **Advanced Reactor “First Movers”**: Policy support (DOE program) will favor companies close to deployment/breakthrough—those that can move from pilot to cash generation by 2026-2030. Early capital and regulatory champions could see premium returns.
82
+
83
+ ---
84
+
85
+ ## 4. **References**
86
+
87
+ - [Google’s Data Center Bets — TechCrunch](https://techcrunch.com/2025/07/01/googles-data-center-energy-use-doubled-in-four-years/)
88
+ - [US DOE Pilot Program — POWER Magazine](https://www.powermag.com/doe-pilot-program-targets-three-nuclear-test-reactors-for-2026-criticality-under-department-authorization/)
89
+ - [Palantir and Nuclear — POWER Magazine](https://www.powermag.com/groups-partnering-to-develop-ai-software-to-speed-nuclear-reactor-construction/)
90
+ - [Oklo/TerraPower/HALEU — Oil & Gas 360](https://www.oilandgas360.com/oklo-enters-strategic-collaborations-with-hexium-and-terrapower-to-launch-new-pathway-for-domestic-haleu-enrichment/)
91
+ - [Westinghouse/ITER Contract — POWER Magazine](https://www.powermag.com/westinghouse-iter-sign-180-million-contract-to-advance-nuclear-fusion/)
92
+ - [Fusion Market Outlook — Precedence Research](https://www.precedenceresearch.com/fusion-energy-market)
93
+ - [BWX Technologies (BWXT) — Investor Relations](https://www.bwxt.com/)
94
+
95
+ ---
96
+
97
+ ## 5. **Investment Hypothesis**
98
+
99
+ **Thesis**: The convergence of policy, technology (AI/data center demand), and strategic investment from leading corporates is catalyzing a new nuclear buildout cycle—especially in the US. *First-mover* advanced fission and fusion startups, US-centric enrichment supply, and key enabling technologies (digital/twin/AI/construction) stand to generate outsize returns, particularly ahead of confirmed revenue streams in the early 2030s.
100
+
101
+ - **Core Bets**:
102
+ - **Oklo** — if price corrects—offers a uniquely exposed pure play on the regulatory shift and DOE pilot program.
103
+ - **Centrus Energy** — levered, high-risk/high-reward play on domestic HALEU enrichment.
104
+ - **BWX Technologies** — lower-risk, steady exposure to SMR and advanced builds, and possible defense tailwinds.
105
+
106
+ - **Venture/Aggressive**:
107
+ - Track private rounds (Commonwealth Fusion, Kairos Power); watch for IPO or secondary liquidity events.
108
+ - Monitor “picks and shovels” suppliers (engineering, digital, sensing, permitting).
109
+
110
+ - **Catalysts**:
111
+ - DOE pilot selections and project starts (late 2025/2026).
112
+ - Google/Microsoft/other tech-driven PPAs or partnerships.
113
+ - US and UK regulatory acceleration or major political support.
114
+
115
+ **Risks**: Execution slippage, cost overruns, regulatory reversals, or overhyped/illiquid microcaps. Fusion commercial viability remains >5-7 years out.
116
+
117
+ ---
118
+
119
+ # **Summary Table**
120
+
121
+ | Company | Ticker | Opportunity | Moat/Signal | Notes |
122
+ |------------------------|--------|------------------------|-----------------------------------|--------------------------------------------|
123
+ | Oklo | OKLO | Early pure play SMR | DOE pilot, TerraPower partnership | SPAC, recent, monitor valuation carefully |
124
+ | Centrus Energy | LEU | HALEU enrichment | Only US-capable, DOE contracts | High volatility |
125
+ | BWX Technologies | BWXT | Established supplier | Govt defense, recurring revenue | Steady, strong FCF & fundamentals |
126
+ | Commonwealth Fusion | – | Fusion, Google backing | Tech, strategic capital | Private, pre-IPO/2nd round watching |
127
+ | Kairos Power | – | SMR, Google offtake | Major tech validation | Private, track for IPO |
128
+ | Palantir Technologies | PLTR | Nuclear AI/software | 1st big software entrant | Not a pure play, watch ecosystem effects |
129
+
130
+ ---
131
+
132
+ ## **Bottom Line:**
133
+ *The investable landscape for nuclear is evolving rapidly—value investors should focus on companies bridging policy tailwind into real commercial assets, with an eye for US-centric supply, strategic contracts, and digital enablement of an emerging nuclear buildout cycle. Small/underfunded public names could offer asymmetric re-rating as the cycle unfolds.*
data/nuclear_energy_2025-07-04.md ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ > Topic: `Nuclear energy`
3
+ > Articles Collected: `150`
4
+ > Generated: `2025-07-04 13:55`
5
+ >
6
+ # Nuclear Energy: Value-Investor Weekly Memo
7
+ **Week of June 30 – July 7, 2025**
8
+
9
+ ---
10
+
11
+ ## Executive Summary: Sentiment & Market Trends
12
+
13
+ This week, nuclear energy remains at the center of global and U.S. energy policy debates, buoyed by both political tailwinds (GOP-led support in legislation, state-level deployment pushes) and rising demand from AI/data center infrastructure. Nuclear is also strategically reemerging as the “clean firm” power of choice as renewables face policy setbacks, intermittency challenges, and grid reliability strains. Major tech companies and select startup activity point to accelerations in both fission (SMRs) and fusion, with corporate and government actors signaling capital and operational shifts toward advanced nuclear solutions.
14
+
15
+ Market sentiment appears mildly positive for established names but remains neutral for the broader sector. Early-stage deal flow and new executive moves hint at undervalued opportunities in uranium miners, SMR developers, and next-gen reactor supply chains, all backstopped by robust macro trends.
16
+
17
+ ---
18
+
19
+ ## 1. Key Value Signals
20
+
21
+ - **Public-Private Partnerships & Policy Tailwinds**
22
+ - New York’s governor directs pursuit of at least 1 GW of new nuclear (possible “fleet-style” deployments), signifying state-level commitment.
23
+ - GOP legislation weakens renewables but retains and even enhances support for nuclear/geothermal—improving medium-term earning prospects for nuclear-exposed businesses.
24
+ - **Tech Giant Commitments**
25
+ - Google commits to buying power from Commonwealth Fusion Systems (fusion) and from Kairos Power (SMRs/fission), underscoring long-term belief in and potential floor demand for advanced nuclear power.
26
+ - **M&A / Executive Movement**
27
+ - Ur-Energy (URG) names Matthew Gili (ex-Cameco, Energy Fuels) as President; strong management pedigree in uranium mining suggests focus on operational ramp-up and credibility for growth.
28
+ - **Private Funding & Industrial Partnerships**
29
+ - Westinghouse-ITER $180M fusion contract advances commercial pathways for fusion.
30
+ - Palantir partners with The Nuclear Company for AI deployment in nuclear construction, potentially de-risking timelines and cost overruns—key bottlenecks for new plants.
31
+ - **Uranium Financing**
32
+ - Energy Fuels (NYSE: UUUU) launches $300M ATM share offering for growth and possibly M&A, indicating possible scale-up action or acquisition-driven value.
33
+
34
+ ---
35
+
36
+ ## 2. Stocks or Startups to Watch
37
+
38
+ ### Undervalued Small Caps / Startups
39
+
40
+ - **Ur-Energy (URG)**
41
+ - **Sector**: Uranium production/mining
42
+ - **Signals**: New CEO with pedigree, North American supply play; potential for insider or institutional accumulation.
43
+ - **Fundamentals**: Historically low P/B and P/E vs. sector; improving cash flow as uranium prices trend higher.
44
+ - **Energy Fuels (UUUU)**
45
+ - **Sector**: Uranium/rare earths
46
+ - **Signals**: ATM share offering—could precede an operational expansion, M&A, or balance sheet fortification.
47
+ - **Moat**: Vertical integration and North American production base; tailwinds from potential U.S. uranium supply mandates.
48
+ - **Kairos Power**
49
+ - **Sector**: Small Modular Reactor (SMR) developer
50
+ - **Signals**: Google is a committed off-taker (500 MW); not public but watch for IPO or private rounds.
51
+ - **Moat**: Proprietary reactor and fuel tech, first-mover commercial projects.
52
+ - **Commonwealth Fusion Systems (private)**
53
+ - **Sector**: Fusion
54
+ - **Signals**: Google investing + off-take for 200MW; implies robust institutional backing, possible pre-IPO unicorn.
55
+ - **Moat**: Leading IP/patent portfolio in commercial fusion.
56
+ - **Floating Nuclear Consortia (Europe/Mediterranean)**
57
+ - **Sector**: Maritime nuclear
58
+ - **Signals**: New industry consortium for floating plants; regulatory tailwinds in Europe; riskier but paradigm-shifting.
59
+
60
+ ### Large-Cap Defensive/Incumbent Names
61
+
62
+ - **Westinghouse (private, but watch via Brookfield Asset Management/partners)**
63
+ - **Signals**: $180M fusion contract + global SMR tenders.
64
+ - **Moat**: Deep IP/patents, established utility relationships.
65
+
66
+ #### Emerging Themes
67
+ - SMEs/startups deploying AI to compress reactor construction timelines (e.g., The Nuclear Company + Palantir).
68
+ - Uranium spot market dislocations, supply security, and U.S./Canadian production uptrend.
69
+
70
+ ---
71
+
72
+ ## 3. What Smart Money Might Be Acting On
73
+
74
+ ### Institutional Moves and VC Flows
75
+
76
+ - **Tech Company Off-Take Agreements**: Google’s long-dated power purchase agreements (PPAs) for nuclear fusion and SMRs indicate that large buyers are locking in future clean firm power, giving runway and de-risking revenue for emerging projects.
77
+ - **Leadership Talent Migration**: Appointment of high-profile operators (e.g., Matthew Gili at URG) often precedes capital flows and operational improvement.
78
+ - **Private/VC Investment**: Ongoing private fundraising in fusion (CFS/publicized; others less visible) and SMR space—potential for pre-IPO access or PIPE deals.
79
+ - **Policy-driven Lifts**: Funds with a value/cyclical tilt may be accumulating uranium miners and established SMR suppliers, expecting U.S. or European state-driven demand and pricing power.
80
+
81
+ ---
82
+
83
+ ## 4. References
84
+
85
+ - [Insider Monkey: Ur-Energy appoints Matthew Gili](https://www.insidermonkey.com/blog/ur-energy-urg-names-matthew-gili-as-president-to-support-growth-strategy-1562642/)
86
+ - [TechCrunch: Google’s data center energy use doubles; commits to SMRs & Fusion](https://techcrunch.com/2025/07/01/googles-data-center-energy-use-doubled-in-four-years/)
87
+ - [Newsweek: Google bets on Nuclear Fusion, Commonwealth Fusion Systems](https://www.newsweek.com/google-bets-nuclear-fusion-next-generation-clean-power-2091877)
88
+ - [POWER Magazine: Westinghouse & ITER fusion contract](https://www.powermag.com/westinghouse-iter-sign-180-million-contract-to-advance-nuclear-fusion/)
89
+ - [Utility Dive: NY Gov. Hochul nuclear push](https://www.utilitydive.com/news/new-york-gov-hochul-hints-at-fleet-style-approach-to-nuclear-deployments/751838/)
90
+ - [Insider Monkey: Energy Fuels ATM offering](https://www.insidermonkey.com/blog/energy-fuels-uuuu-launches-300-million-atm-share-offering-program-1562647/)
91
+ - [Marine Link: Industry consortium assesses floating nuclear](https://www.marinelink.com/news/industry-consortium-asses-floating-527616)
92
+ - [The Verge, Sky News, NPR, CleanTechnica] (multiple for macro/policy context)
93
+
94
+ ---
95
+
96
+ ## 5. Investment Hypothesis
97
+
98
+ Amid rising electricity demand from AI/data centers and the political marginalization of wind/solar, nuclear energy—particularly next-gen reactor developers, operationally leveraged uranium miners, and AI-enabled project managers—is set to benefit from both structural and cyclical forces. Near-term policy support, tech company PPA commitments, and tangible operational milestones (fusion contracts, executive talent upgrades) provide a fundamental backdrop for value investors.
99
+
100
+ **Thesis**: Select undervalued uranium miners (URG, UUUU) and actionable SMR/fusion-related plays with real partnerships or contracts (Kairos, CFS, Palantir’s nuclear construction software partners) are likely mispriced relative to long-term demand, the emergence of tech buyer power, and regulatory tailwinds. Watch for balance sheet improvement, insider activity, and capex deployment as future catalysts.
101
+
102
+ **Actionable Watchlist:**
103
+ - Ur-Energy (NYSE: URG) — ride management upgrade and uranium bull cycle
104
+ - Energy Fuels (NYSE: UUUU) — play on U.S. supply autonomy and balance sheet firepower
105
+ - Private: Kairos Power, Commonwealth Fusion Systems — monitor for IPO/news, pre-IPO funds
106
+ - Established supply chain: Westinghouse (via BAM, or tracking SMR contracts), Palantir’s nuclear ventures
107
+
108
+ ---
109
+
110
+ **Macroeconomic/Regulatory Context:**
111
+ - U.S. and European grid reliability and policy now lean “pro-nuclear” as renewables face political and technical hurdles.
112
+ - Tech-sector demand for bespoke clean, reliable baseload may outpace traditional grid growth, driving long-term PPA/contracting up for nuclear-adjacent firms.
113
+ - Early stage risk remains (especially fusion), but government cash, looser environmental reviews, and talent influx are de-risking the sector.
114
+
115
+ ---
116
+
117
+ **Discipline:** Accumulate on dips with a margin of safety; remain alert to policy reversals, cost overruns, and technology risk. Revisit on IPO news, federal incentive shifts, and real-world contract wins.
external/.DS_Store ADDED
Binary file (6.15 kB). View file
 
external/FinGPT/.github/FUNDING.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # These are supported funding model platforms
2
+
3
+ github: [BruceYanghy]
4
+ open_collective: # Replace with a single Open Collective username
5
+ ko_fi: # Replace with a single Ko-fi username
6
+ tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
7
+ community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
8
+ liberapay: # Replace with a single Liberapay username
9
+ issuehunt: # Replace with a single IssueHunt username
10
+ otechie: # Replace with a single Otechie username
11
+ lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12
+ custom: ['paypal.me/Hongyang']
external/FinGPT/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
external/FinGPT/.gitignore ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+ .DS_Store
131
+ .idea/FinGPT.iml
132
+ *.xml
133
+
134
+ # Job scripts
135
+ fingpt/FinGPT_sentiment/instruct-FinGPT/run.sh
136
+ fingpt/FinGPT_sentiment/instruct-FinGPT/checkpoints
137
+ fingpt/FinGPT_sentiment/instruct-FinGPT/ds_results_all_10_v2_1.*
138
+ FinGPT_Training_LoRA_with_Chatglm2_6b_for_beginners.ipynb
139
+
140
+ # Benchmark data
141
+ fingpt/FinGPT_Benchmark/data/*/**
external/FinGPT/.gitpod.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # This configuration file was automatically generated by Gitpod.
2
+ # Please adjust to your needs (see https://www.gitpod.io/docs/introduction/learn-gitpod/gitpod-yaml)
3
+ # and commit this file to your remote git repository to share the goodness with others.
4
+
5
+ # Learn more from ready-to-use templates: https://www.gitpod.io/docs/introduction/getting-started/quickstart
6
+
7
+ tasks:
8
+ - init: pip install -r requirements.txt
9
+
10
+
external/FinGPT/.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
external/FinGPT/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6
+
7
+ ## Our Standards
8
+
9
+ Examples of behavior that contributes to creating a positive environment include:
10
+
11
+ - Using welcoming and inclusive language
12
+ - Being respectful of differing viewpoints and experiences
13
+ - Gracefully accepting constructive criticism
14
+ - Focusing on what is best for the community
15
+ - Showing empathy towards other community members
16
+
17
+ Examples of unacceptable behavior by participants include:
18
+
19
+ - The use of sexualized language or imagery and unwelcome sexual attention or advances
20
+ - Trolling, insulting/derogatory comments, and personal or political attacks
21
+ - Public or private harassment
22
+ - Publishing others' private information, such as a physical or electronic address, without explicit permission
23
+ - Other conduct that could reasonably be considered inappropriate in a professional setting
24
+
25
+ ## Our Responsibilities
26
+
27
+ We as project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28
+
29
+ We have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned with this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30
+
31
+ ## Scope
32
+
33
+ This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project email address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
34
+
35
+ ## Enforcement
36
+
37
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident.
38
+
39
+ ## Enforcement Guidelines
40
+
41
+ Community managers will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
42
+
43
+ ### 1. Correction
44
+
45
+ **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
46
+
47
+ **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
48
+
49
+ ### 2. Warning
50
+
51
+ **Community Impact**: A violation through a single incident or series of actions.
52
+
53
+ **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
54
+
55
+ ### 3. Temporary Ban
56
+
57
+ **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
58
+
59
+ **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public of private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
60
+
61
+ ### 4. Permanent Ban
62
+
63
+ **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
64
+
65
+ **Consequence**: A permanent ban from any sort of public interaction within the community.
external/FinGPT/CONTRIBUTING.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FinGPT Contribution Guidelines 🚀
2
+
3
+ Welcome to the FinGPT project! We are thrilled to have you here 🌟. Your contributions are instrumental in shaping the intersection of finance and AI, making it even more amazing. 📈✨ Let's embark on this journey together.
4
+
5
+ ## Code of Conduct 🤝
6
+
7
+ Before diving in, please take a moment to review our Code of Conduct. It sets the tone for our community and emphasizes the importance of respect and inclusivity. [Read the Code of Conduct](LICENSE.md).
8
+
9
+ ## Contribution Types 🦠🚀📚
10
+
11
+ ### Bug Reports 🐞
12
+
13
+ If you encounter any bugs during your journey, don't fret! We have the Bug Busters ready to help. To report a bug, follow these steps:
14
+
15
+ 1. Check if the bug has already been reported in [GitHub Issues](https://github.com/AI4Finance-Foundation/FinGPT/issues).
16
+ 2. If it's a new bug, open a new issue with a concise description and provide detailed, step-by-step instructions to reproduce it.
17
+
18
+ ### Feature Requests 💡
19
+
20
+ Do you have visionary ideas that could elevate FinGPT? Share them with us! When submitting a feature request, be sure to include:
21
+
22
+ 1. A clear and vivid description of the feature you envision.
23
+ 2. Discuss the impact and potential benefits.
24
+
25
+ ### Documentation 📖
26
+
27
+ For those with a penchant for words and an eye for detail, consider contributing to our documentation. You can make the documentation more enlightening for everyone. 🧙📜
28
+
29
+ ### Code Contributions 💻
30
+
31
+ Calling all AI heroes and wizards! You are the secret sauce behind the FinGPT project. To contribute code and save the financial world:
32
+
33
+ 1. **Fork the Repository**: Click the "Fork" button on the top right of the repository's page. This creates your own copy of the project.
34
+
35
+ 2. **Clone your Fork**: In your terminal, use the following command to clone your fork to your local machine:
36
+
37
+ ```bash
38
+ git clone https://github.com/YourUsername/FinGPT.git
39
+ ```
40
+
41
+ 3. **Create a New Branch**: Make a new branch for your adventures. This helps keep the main codebase clean:
42
+
43
+ ```bash
44
+ git checkout -b your-feature-branch
45
+ ```
46
+
47
+ 4. **Work Your Magic**: Implement your code or changes.
48
+
49
+ 5. **Commit and Push**: Use these commands to commit your changes and push them to your fork:
50
+
51
+ ```bash
52
+ git commit -m "Your commit message"
53
+ git push origin your-feature-branch
54
+ ```
55
+
56
+ 6. **Create a Pull Request**: Go to the original FinGPT repository and click "New Pull Request." Select your branch, write a description, and submit.
57
+
58
+ ## Seeking Assistance ❓🙋‍♀️
59
+
60
+ If you find yourself stuck or have questions, remember that our support team is your sidekick. Don't hesitate to reach out. We are here to guide you through the process and provide any necessary assistance.
61
+
62
+ ## Getting Started 🚀🚀
63
+
64
+ Are you ready to make a mark on the FinGPT project? Grab your cape and join us in our mission to make finance and AI even more incredible. Your contributions are the magic that fuels our journey.
65
+
66
+ 🔗 [FinGPT GitHub Repository](https://github.com/AI4Finance-Foundation/FinGPT)
67
+
68
+ ### May your contributions be as amazing as you are! 🌌🚀
external/FinGPT/FinGPT_ Training with LoRA and Meta-Llama-3-8B.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
external/FinGPT/FinGPT_Inference_Llama2_13B_falcon_7B_for_Beginners.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
external/FinGPT/FinGPT_Training_LoRA_with_ChatGLM2_6B_for_Beginners_v2-2.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
external/FinGPT/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 AI4Finance Foundation Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
external/FinGPT/MANIFEST.in ADDED
@@ -0,0 +1 @@
 
 
1
+ include fingpt/FinGPT_Benchmark/benchmarks/sentiment_templates.txt
external/FinGPT/README.md ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <img align="center" width="30%" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/e0371951-1ce1-488e-aa25-0992dafcc139">
3
+ </div>
4
+
5
+ # FinGPT: Open-Source Financial Large Language Models
6
+ [![Downloads](https://static.pepy.tech/badge/fingpt)](https://pepy.tech/project/fingpt)
7
+ [![Downloads](https://static.pepy.tech/badge/fingpt/week)](https://pepy.tech/project/fingpt)
8
+ [![Python 3.8](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
9
+ [![PyPI](https://img.shields.io/pypi/v/fingpt.svg)](https://pypi.org/project/fingpt/)
10
+ ![License](https://img.shields.io/github/license/AI4Finance-Foundation/fingpt.svg?color=brightgreen)
11
+ ![](https://img.shields.io/github/issues-raw/AI4Finance-Foundation/fingpt?label=Issues)
12
+ ![](https://img.shields.io/github/issues-closed-raw/AI4Finance-Foundation/fingpt?label=Closed+Issues)
13
+ ![](https://img.shields.io/github/issues-pr-raw/AI4Finance-Foundation/fingpt?label=Open+PRs)
14
+ ![](https://img.shields.io/github/issues-pr-closed-raw/AI4Finance-Foundation/fingpt?label=Closed+PRs)
15
+
16
+ <div align="center">
17
+ <img align="center" src=figs/logo_transparent_background.png width="40%"/>
18
+ </div>
19
+
20
+ Let us not expect Wall Street to open-source LLMs or open APIs, due to FinTech institutes' internal regulations and policies.
21
+
22
+ [Blueprint of FinGPT](https://arxiv.org/abs/2306.06031)
23
+
24
+ <https://huggingface.co/FinGPT>
25
+
26
+ [![](https://dcbadge.vercel.app/api/server/trsr8SXpW5)](https://discord.gg/trsr8SXpW5)
27
+
28
+ ![Visitors](https://api.visitorbadge.io/api/VisitorHit?user=AI4Finance-Foundation&repo=FinGPT&countColor=%23B17A)
29
+
30
+
31
+ ## What's New:
32
+ - [Model Release] Nov, 2023: We release [FinGPT-Forecaster](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Forecaster)! 🔥[Demo](https://huggingface.co/spaces/FinGPT/FinGPT-Forecaster), [Medium Blog](https://medium.datadriveninvestor.com/introducing-fingpt-forecaster-the-future-of-robo-advisory-services-50add34e3d3c) & [Model](https://huggingface.co/FinGPT/fingpt-forecaster_dow30_llama2-7b_lora) are available on Huggingface🤗!
33
+ - [Paper Acceptance] Oct, 2023: ["FinGPT: Instruction Tuning Benchmark for Open-Source Large Language Models in Financial Datasets"](https://arxiv.org/abs/2310.04793) is accepted🎉 by [Instruction Workshop](https://an-instructive-workshop.github.io/) @ NeurIPS 2023
34
+ - [Paper Acceptance] Oct, 2023: ["FinGPT: Democratizing Internet-scale Data for Financial Large Language Models"](https://arxiv.org/abs/2307.10485) is accepted🎉 by [Instruction Workshop](https://an-instructive-workshop.github.io/) @ NeurIPS 2023
35
+ - [Model Release] Oct, 2023: We release the [financial multi-task LLMs](https://huggingface.co/FinGPT) 🔥 produced when evaluating base-LLMs on [FinGPT-Benchmark](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Benchmark)
36
+ - [Paper Acceptance] Sep, 2023: ["Enhancing Financial Sentiment Analysis via Retrieval Augmented Large Language Models"](https://arxiv.org/abs/2310.04027) is accepted🎉 by [ACM International Conference on AI in Finance (ICAIF-23)](https://ai-finance.org/icaif-23-accepted-papers/)
37
+ - [Model Release] Aug, 2023: We release the [financial sentiment analysis model](https://huggingface.co/FinGPT/fingpt-sentiment_llama2-13b_lora) 🔥
38
+ - [Paper Acceptance] Jul, 2023: ["Instruct-FinGPT: Financial Sentiment Analysis by Instruction Tuning of General-Purpose Large Language Models"](https://arxiv.org/abs/2306.12659) is accepted🎉 by [FinLLM 2023](https://finllm.github.io/workshop/#/fcb)@IJCAI 2023
39
+ - [Paper Acceptance] Jul, 2023: ["FinGPT: Open-Source Financial Large Language Models"](https://arxiv.org/abs/2306.06031) is accepted🎉 by [FinLLM 2023](https://finllm.github.io/workshop/#/fcb)@IJCAI 2023
40
+ - [Medium Blog] Jun 2023: [FinGPT: Powering the Future of Finance with 20 Cutting-Edge Applications](https://medium.datadriveninvestor.com/fingpt-powering-the-future-of-finance-with-20-cutting-edge-applications-7c4d082ad3d8)
41
+
42
+ ## Why FinGPT?
43
+
44
+ 1). Finance is highly dynamic. [BloombergGPT](https://arxiv.org/abs/2303.17564) trained an LLM using a mixture of finance data and general-purpose data, which took about 53 days, at a cost of around **$3M**). It is costly to retrain an LLM model like BloombergGPT every month or every week, thus lightweight adaptation is highly favorable. FinGPT can be fine-tuned swiftly to incorporate new data (the cost falls significantly, less than **$300 per fine-tuning**).
45
+
46
+ 2). Democratizing Internet-scale financial data is critical, say allowing timely updates of the model (monthly or weekly updates) using an automatic data curation pipeline. BloombergGPT has privileged data access and APIs, while FinGPT presents a more accessible alternative. It prioritizes lightweight adaptation, leveraging the best available open-source LLMs.
47
+
48
+ 3). The key technology is "RLHF (Reinforcement learning from human feedback)", which is missing in BloombergGPT. RLHF enables an LLM model to learn individual preferences (risk-aversion level, investing habits, personalized robo-advisor, etc.), which is the "secret" ingredient of ChatGPT and GPT4.
49
+
50
+
51
+ ### Milestone of AI Robo-Advisor: FinGPT-Forecaster
52
+
53
+ Try the latest released FinGPT-Forecaster demo at our [HuggingFace Space](https://huggingface.co/spaces/FinGPT/FinGPT-Forecaster)
54
+
55
+ The dataset for FinGPT-Forecaster: https://huggingface.co/datasets/FinGPT/fingpt-forecaster-dow30-202305-202405
56
+
57
+ ![demo_interface](fingpt/FinGPT_Forecaster/figs/interface.png)
58
+
59
+ Enter the following inputs:
60
+
61
+ 1) ticker symbol (e.g. AAPL, MSFT, NVDA)
62
+ 2) the day from which you want the prediction to happen (yyyy-mm-dd)
63
+ 3) the number of past weeks where market news are retrieved
64
+ 4) whether to add the latest basic financials as additional information
65
+
66
+ Click Submit! And you'll be responded with a well-rounded analysis of the company and a prediction for next week's stock price movement!
67
+
68
+ For detailed and more customized implementation, please refer to [FinGPT-Forecaster](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Forecaster)
69
+
70
+
71
+ ## FinGPT Demos:
72
+
73
+ ### Current State-of-the-arts for Financial Sentiment Analysis
74
+
75
+ * [FinGPT V3 (Updated on 10/12/2023)](./fingpt)
76
+
77
+ * What's new: **Best trainable and inferable FinGPT for sentiment analysis on a single RTX 3090, which is even better than GPT-4 and ChatGPT Finetuning.**
78
+
79
+ * [FinGPT v3](https://huggingface.co/FinGPT/fingpt-sentiment_llama2-13b_lora) series are LLMs finetuned with the LoRA method on the News and Tweets sentiment analysis dataset which achieve the best scores on most of the financial sentiment analysis datasets with low cost.
80
+
81
+ * FinGPT v3.3 use llama2-13b as base model; FinGPT v3.2 uses llama2-7b as base model; FinGPT v3.1 uses chatglm2-6B as base model.
82
+
83
+ * Benchmark Results:
84
+
85
+ * | Weighted F1 | FPB | FiQA-SA | TFNS | NWGI | Devices | Time | Cost |
86
+ | ------------------------------------------------------------ | :-------: | :-------: | :-------: | :-------: | :----------------: | :---------: | :------------: |
87
+ | [FinGPT v3.3](https://huggingface.co/FinGPT/fingpt-sentiment_llama2-13b_lora)| **0.882** | 0.874 | **0.903** | **0.643** | 1 × RTX 3090 | 17.25 hours | $17.25 |
88
+ | FinGPT v3.2| 0.850 | 0.860 | 0.894 | 0.636 | 1 × A100 | 5.5 hours | $ 22.55 |
89
+ | FinGPT v3.1| 0.855 | 0.850 | 0.875 | 0.642 | 1 × A100 | 5.5 hours | $ 22.55 |
90
+ | FinGPT (8bit) | 0.855 | 0.847 | 0.879 | 0.632 | 1 × RTX 3090 | 6.47 hours | $ 6.47 |
91
+ | FinGPT (QLoRA) | 0.777 | 0.752 | 0.828 | 0.583 | 1 × RTX 3090 | 4.15 hours | $ 4.15 |
92
+ | OpenAI Fine-tune | 0.878 | **0.887** | 0.883 | - | - | - | - |
93
+ | GPT-4 | 0.833 | 0.630 | 0.808 | - | - | - | - |
94
+ | FinBERT | 0.880 | 0.596 | 0.733 | 0.538 | 4 × NVIDIA K80 GPU | - | - |
95
+ | Llama2-7B | 0.390 | 0.800 | 0.296 | 0.503 | 2048 × A100 | 21 days | $ 4.23 million |
96
+ | BloombergGPT | 0.511 | 0.751 | - | - | 512 × A100 | 53 days | $ 2.67 million |
97
+
98
+ **Cost per GPU hour.** For **A100 GPUs**, the AWS p4d.24xlarge instance, equipped with 8 A100 GPUs is used as a benchmark to estimate the costs. Note that BloombergGPT also used p4d.24xlarge As of July 11, 2023, the hourly rate for this instance stands at $32.773. Consequently, the estimated cost per GPU hour comes to $32.77 divided by 8, resulting in approximately **$4.10**. With this value as the reference unit price (1 GPU hour). **BloombergGPT estimated cost= 512 x 53 x 24 = 651,264 GPU hours x $4.10 = $2,670,182.40**. For **RTX 3090**, we assume its cost per hour is approximately **$1.0**, which is actually much higher than available GPUs from platforms like vast.ai.
99
+
100
+ * Reproduce the results by running [benchmarks](./fingpt/FinGPT_Sentiment_Analysis_v3/benchmark/benchmarks.ipynb), and the detailed tutorial is on the way.
101
+ * Finetune your own FinGPT v3 model with the LoRA method on only an RTX 3090 with this [notebook](./fingpt/FinGPT_Sentiment_Analysis_v3/training_8bit/train_Llama2_13B.ipynb) in 8bit or this [notebook](./fingpt/FinGPT_Sentiment_Analysis_v3/training_int4/train.ipynb) in int4 (QLoRA)
102
+
103
+ * [FinGPT V1](./fingpt)
104
+ + **FinGPT by finetuning ChatGLM2 / Llama2 with LoRA with the market-labeled data for the Chinese Market**
105
+
106
+ ## Instruction Tuning Datasets and Models
107
+ The datasets we used, and the **multi-task financial LLM** models are available at <https://huggingface.co/FinGPT>
108
+
109
+ [Our Code](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Benchmark)
110
+
111
+ | Datasets | Train Rows | Test Rows |Description |
112
+ | --------- | ----------------- | ------------ | --------------------- |
113
+ | [fingpt-sentiment-train](https://huggingface.co/datasets/FinGPT/fingpt-sentiment-train) | 76.8K | N/A|Sentiment Analysis Training Instructions |
114
+ | [fingpt-finred](https://huggingface.co/datasets/FinGPT/fingpt-finred)| 27.6k | 5.11k | Financial Relation Extraction Instructions |
115
+ | [fingpt-headline](https://huggingface.co/datasets/FinGPT/fingpt-headline) | 82.2k | 20.5k | Financial Headline Analysis Instructions|
116
+ | [fingpt-ner](https://huggingface.co/datasets/FinGPT/fingpt-ner) | 511 | 98 | Financial Named-Entity Recognition Instructions|
117
+ | [fingpt-fiqa_qa](https://huggingface.co/datasets/FinGPT/fingpt-fiqa_qa) | 17.1k | N/A | Financial Q&A Instructions|
118
+ | [fingpt-fineval](https://huggingface.co/datasets/FinGPT/fingpt-fineval) | 1.06k | 265 | Chinese Multiple-Choice Questions Instructions|
119
+
120
+ Multi-task financial LLMs Models:
121
+ ```python
122
+ demo_tasks = [
123
+ 'Financial Sentiment Analysis',
124
+ 'Financial Relation Extraction',
125
+ 'Financial Headline Classification',
126
+ 'Financial Named Entity Recognition',]
127
+ demo_inputs = [
128
+ "Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano",
129
+ "Apple Inc. Chief Executive Steve Jobs sought to soothe investor concerns about his health on Monday, saying his weight loss was caused by a hormone imbalance that is relatively simple to treat.",
130
+ 'gold trades in red in early trade; eyes near-term range at rs 28,300-28,600',
131
+ 'This LOAN AND SECURITY AGREEMENT dated January 27 , 1999 , between SILICON VALLEY BANK (" Bank "), a California - chartered bank with its principal place of business at 3003 Tasman Drive , Santa Clara , California 95054 with a loan production office located at 40 William St ., Ste .',]
132
+ demo_instructions = [
133
+ 'What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.',
134
+ 'Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be "relation1: word1, word2; relation2: word3, word4". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.',
135
+ 'Does the news headline talk about price going up? Please choose an answer from {Yes/No}.',
136
+ 'Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.',]
137
+ ```
138
+
139
+ | Models | Description | Function |
140
+ | --------- | --------------------- |---------------- |
141
+ | [fingpt-mt_llama2-7b_lora](https://huggingface.co/FinGPT/fingpt-mt_llama2-7b_lora)| Fine-tuned Llama2-7b model with LoRA | Multi-Task |
142
+ | [fingpt-mt_falcon-7b_lora](https://huggingface.co/FinGPT/fingpt-mt_falcon-7b_lora)| Fine-tuned falcon-7b model with LoRA | Multi-Task |
143
+ | [fingpt-mt_bloom-7b1_lora](https://huggingface.co/FinGPT/fingpt-mt_bloom-7b1_lora) | Fine-tuned bloom-7b1 model with LoRA | Multi-Task |
144
+ | [fingpt-mt_mpt-7b_lora](https://huggingface.co/FinGPT/fingpt-mt_mpt-7b_lora) | Fine-tuned mpt-7b model with LoRA | Multi-Task |
145
+ | [fingpt-mt_chatglm2-6b_lora](https://huggingface.co/FinGPT/fingpt-mt_chatglm2-6b_lora) | Fine-tuned chatglm-6b model with LoRA | Multi-Task |
146
+ | [fingpt-mt_qwen-7b_lora](https://huggingface.co/FinGPT/fingpt-mt_qwen-7b_lora) | Fine-tuned qwen-7b model with LoRA | Multi-Task |
147
+ | [fingpt-sentiment_llama2-13b_lora](https://huggingface.co/FinGPT/fingpt-sentiment_llama2-13b_lora) | Fine-tuned llama2-13b model with LoRA | Single-Task |
148
+ | [fingpt-forecaster_dow30_llama2-7b_lora](https://huggingface.co/FinGPT/fingpt-forecaster_dow30_llama2-7b_lora) | Fine-tuned llama2-7b model with LoRA | Single-Task |
149
+
150
+
151
+ ## Tutorials
152
+ [[Training] Beginner’s Guide to FinGPT: Training with LoRA and ChatGLM2–6B One Notebook, $10 GPU](https://byfintech.medium.com/beginners-guide-to-fingpt-training-with-lora-chatglm2-6b-9eb5ace7fe99)
153
+
154
+ ## Understanding FinGPT: An Educational Blog Series
155
+ + [FinGPT: Powering the Future of Finance with 20 Cutting-Edge Applications
156
+ ](https://medium.datadriveninvestor.com/fingpt-powering-the-future-of-finance-with-20-cutting-edge-applications-7c4d082ad3d8)
157
+ + [FinGPT I: Why We Built the First Open-Source Large Language Model for Finance
158
+ ](https://medium.datadriveninvestor.com/fingpt-i-why-we-built-the-first-open-source-large-language-model-for-finance-c01b5517ca)
159
+ + [FinGPT II: Cracking the Financial Sentiment Analysis Task Using Instruction Tuning of General-Purpose Large Language Models
160
+ ](https://medium.datadriveninvestor.com/fingpt-ii-cracking-the-financial-sentiment-analysis-task-using-instruction-tuning-of-3333bce428c4)
161
+
162
+
163
+ ## FinGPT Ecosystem
164
+ ### FinGPT embraces a full-stack framework for FinLLMs with five layers:
165
+ 1. **Data source layer**: This layer assures comprehensive market coverage, addressing the temporal sensitivity of financial data through real-time information capture.
166
+ 2. **Data engineering layer**: Primed for real-time NLP data processing, this layer tackles the inherent challenges of high temporal sensitivity and low signal-to-noise ratio in financial data.
167
+ 3. **LLMs layer**: Focusing on a range of fine-tuning methodologies such as LoRA, this layer mitigates the highly dynamic nature of financial data, ensuring the model’s relevance and accuracy.
168
+ 4. **Task layer**: This layer is responsible for executing fundamental tasks. These tasks serve as the benchmarks for performance evaluations and cross-comparisons in the realm of FinLLMs
169
+ 5. **Application layer**: Showcasing practical applications and demos, this layer highlights the potential capability of FinGPT in the financial sector.
170
+
171
+ * FinGPT Framework: Open-Source Financial Large Language Models
172
+
173
+ <div align="center">
174
+ <img align="center" src=figs/FinGPT_framework_20240301.png>
175
+ </div>
176
+
177
+ * [FinGPT-RAG](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_RAG): We present a retrieval-augmented large language model framework specifically designed for financial sentiment analysis, optimizing information depth and context through external knowledge retrieval, thereby ensuring nuanced predictions.
178
+
179
+ <div align="center">
180
+ <img align="center" src=figs/FinGPT_RAG_framework.png>
181
+ </div>
182
+
183
+ * [FinGPT-FinNLP](https://github.com/AI4Finance-Foundation/FinNLP): FinNLP provides a playground for all people interested in LLMs and NLP in Finance. Here we provide full pipelines for LLM training and finetuning in the field of finance. The full architecture is shown in the following picture. Detail codes and introductions can be found [here](https://github.com/AI4Finance-Foundation/FinNLP). Or you may refer to the [wiki](https://ai4finance-foundation.github.io/FinNLP/)
184
+
185
+ <div align="center">
186
+ <img align="center" src=figs/FinGPT_FinNLP_data_source.png>
187
+ </div>
188
+
189
+ * [FinGPT-Benchmark](https://github.com/AI4Finance-Foundation/FinGPT/tree/master/fingpt/FinGPT_Benchmark): We introduce a novel Instruction Tuning paradigm optimized for open-source Large Language Models (LLMs) in finance, enhancing their adaptability to diverse financial datasets while also facilitating cost-effective, systematic benchmarking from task-specific, multi-task, and zero-shot instruction tuning tasks.
190
+
191
+
192
+ <div align="center">
193
+ <img align="center" src=figs/FinGPT_Benchmark_20231110.png>
194
+ </div>
195
+
196
+
197
+
198
+ ## Open-Source Base Model used in the LLMs layer of FinGPT
199
+ * Feel free to contribute more open-source base models tailored for various language-specific financial markets.
200
+
201
+ | Base Model |Pretraining Tokens|Context Length | Model Advantages |Model Size|Experiment Results | Applications |
202
+ | ---- | ---- | ---- | ---- | ---- | ---- | ---- |
203
+ | [Llama-2](https://github.com/facebookresearch/llama)|2 Trillion|4096| Llama-2 excels on English-based market data | [llama-2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf) and [Llama-2-13b](https://huggingface.co/meta-llama/Llama-2-13b-hf) | llama-2 consistently shows superior fine-tuning results | Financial Sentiment Analysis, Robo-Advisor |
204
+ | [Falcon](https://github.com/falconry/falcon) |1,500B|2048| Maintains high-quality results while being more resource-efficient | [falcon-7b](https://huggingface.co/tiiuae/falcon-7b) |Good for English market data | Financial Sentiment Analysis |
205
+ | [MPT](https://github.com/mosaicml/llm-foundry) |1T|2048| MPT models can be trained with high throughput efficiency and stable convergence | [mpt-7b](https://huggingface.co/mosaicml/mpt-7b) |Good for English market data | Financial Sentiment Analysis |
206
+ | [Bloom](https://github.com/bigscience-workshop/bigscience/tree/master/train/tr11-176B-ml#readme) |366B|2048| World’s largest open multilingual language model | [bloom-7b1](https://huggingface.co/bigscience/bloom-7b1) |Good for English market data | Financial Sentiment Analysis |
207
+ | [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)|1.4T |32K |Exceptional capability for Chinese language expression| [chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b) |Shows prowess for Chinese market data | Financial Sentiment Analysis, Financial Report Summary |
208
+ | [Qwen](https://github.com/QwenLM/Qwen-7B)|2.2T |8k |Fast response and high accuracy| [qwen-7b](https://huggingface.co/tangger/Qwen-7B-Chat) |Effective for Chinese market data | Financial Sentiment Analysis|
209
+ | [InternLM](https://github.com/InternLM/InternLM) |1.8T |8k |Can flexibly and independently construct workflows |[internlm-7b](https://huggingface.co/internlm/internlm-7b) |Effective for Chinese market data | Financial Sentiment Analysis |
210
+
211
+ * Benchmark Results for the above open-source Base Models in the financial sentiment analysis task using the same instruction template for SFT (LoRA):
212
+ | Weighted F1/Acc |Llama2 |Falcon | MPT|Bloom |ChatGLM2|Qwen|InternLM |
213
+ | --------- | ----------------- | ------------ | --------------------- | ---------------- | --------------- | ----------------- |----------------- |
214
+ | [FPB](https://huggingface.co/datasets/financial_phrasebank) | 0.863/0.863 | 0.846/0.849 | **0.872**/**0.872** | 0.810/0.810 | 0.850/0.849 |0.854/0.854| 0.709/0.714 |
215
+ | [FiQA-SA](https://huggingface.co/datasets/pauri32/fiqa-2018)| **0.871**/0.855| 0.840/0.811 | 0.863/0.844 | 0.771/0.753| 0.864/**0.862** | 0.867/0.851 |0.679/0.687 |
216
+ | [TFNS](https://huggingface.co/datasets/zeroshot/twitter-financial-news-sentiment) | 0.896/0.895 | 0.893/0.893 | **0.907**/**0.907** | 0.840/0.840 | 0.859/0.858 | 0.883/0.882|0.729/0.731|
217
+ | [NWGI](https://huggingface.co/datasets/oliverwang15/news_with_gpt_instructions) | **0.649/0.651** | 0.636/0.638 | 0.640/0.641| 0.573/0.574| 0.619/0.629 |0.638/0.643|0.498/0.503|
218
+
219
+ ### All Thanks To Our Contributors :
220
+ <a href="https://github.com/AI4Finance-Foundation/FinGPT/graphs/contributors">
221
+ <img src="https://contrib.rocks/image?repo=AI4Finance-Foundation/FinGPT" />
222
+ </a>
223
+
224
+ ## News
225
+
226
+ + [Columbia Perspectives on ChatGPT](https://datascience.columbia.edu/news/2023/columbia-perspectives-on-chatgpt/?utm_source=sendinblue&utm_campaign=DSI%20Newsletter%20April%202023&utm_medium=email)
227
+ + [MIT Technology Review] [ChatGPT is about to revolutionize the economy. We need to decide what that looks like](https://www.technologyreview.com/2023/03/25/1070275/chatgpt-revolutionize-economy-decide-what-looks-like/)
228
+ + [BloombergGPT] [BloombergGPT: A Large Language Model for Finance](https://arxiv.org/abs/2303.17564)
229
+ + [Finextra] [ChatGPT and Bing AI to sit as panellists at fintech conference](https://www.finextra.com/newsarticle/41973/chatgpt-and-bing-ai-to-sit-as-panellists-at-fintech-conference)
230
+
231
+ ## ChatGPT at AI4Finance
232
+
233
+ + [YouTube video] [I Built a Trading Bot with ChatGPT](https://www.youtube.com/watch?v=fhBw3j_O9LE), combining ChatGPT and FinRL.
234
+ + [Hey, ChatGPT! Explain FinRL code to me!](https://medium.com/@ai4finance/hey-chatgpt-explain-finrl-code-to-me-6a91d612296f)
235
+
236
+ ## Introductory
237
+
238
+ + [Sparks of artificial general intelligence: Early experiments with GPT-4](https://arxiv.org/abs/2303.12712)
239
+ + [GPT-4] [GPT-4 Technical Report](https://arxiv.org/abs/2303.08774)
240
+ + [InstructGPT] [Training language models to follow instructions with human feedback](https://openreview.net/forum?id=TG8KACxEON) NeurIPS 2022.
241
+
242
+ [The Journey of Open AI GPT models](https://medium.com/walmartglobaltech/the-journey-of-open-ai-gpt-models-32d95b7b7fb2). GPT models explained. Open AI's GPT-1, GPT-2, GPT-3.
243
+
244
+ + [GPT-3] [Language models are few-shot learners](https://proceedings.neurips.cc/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html) NeurIPS 2020.
245
+ + [GPT-2] [Language Models are Unsupervised Multitask Learners](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)
246
+ + [GPT-1] [Improving Language Understanding by Generative Pre-Training](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf)
247
+ + [Transformer] [Attention is All you Need](https://proceedings.neurips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html) NeurIPS 2017.
248
+
249
+ ## (Financial) Big Data
250
+
251
+ + [BloombergGPT] [BloombergGPT: A Large Language Model for Finance](https://arxiv.org/abs/2303.17564)
252
+
253
+ + [WHAT’S IN MY AI?](https://lifearchitect.ai/whats-in-my-ai/) A Comprehensive Analysis of Datasets Used to Train GPT-1, GPT-2, GPT-3, GPT-NeoX-20B, Megatron-11B, MT-NLG, and Gopher
254
+
255
+ + [FinRL-Meta Repo](https://github.com/AI4Finance-Foundation/FinRL-Meta) and paper [FinRL-Meta: Market Environments and Benchmarks for Data-Driven Financial Reinforcement Learning](https://proceedings.neurips.cc/paper_files/paper/2022/hash/0bf54b80686d2c4dc0808c2e98d430f7-Abstract-Datasets_and_Benchmarks.html). Advances in Neural Information Processing Systems, 2022.
256
+
257
+ + [AI4Finance] [FinNLP](https://github.com/AI4Finance-Foundation/FinNLP) Democratizing Internet-scale financial data.
258
+
259
+ ## Interesting Demos
260
+
261
+ + [GPT-3 Creative Fiction](https://gwern.net/gpt-3#prompts-as-programming) Creative writing by OpenAI’s GPT-3 model, demonstrating poetry, dialogue, puns, literary parodies, and storytelling. Plus advice on effective GPT-3 prompt programming & avoiding common errors.
262
+
263
+ ## ChatGPT for FinTech
264
+
265
+ **ChatGPT Trading Bot**
266
+ + [YouTube video] [ChatGPT Trading strategy 20097% returns](https://www.youtube.com/watch?v=unsa_gXPAJ4)
267
+ + [YouTube video] [ChatGPT Coding - Make A Profitable Trading Strategy In Five Minutes!](https://www.youtube.com/watch?v=4SG2884RcDY)
268
+ + [YouTube video] [Easy Automated Live Trading using ChatGPT (+9660.3% hands free)](https://www.youtube.com/watch?v=dIEZVPVOZPQ)
269
+ + [YouTube video] [ChatGPT Trading Strategy 893% Returns](https://www.youtube.com/watch?v=YxjvjK5AD2M)
270
+ + [YouTube video] [ChatGPT 10 Million Trading Strategy](https://www.youtube.com/watch?v=9VPfd08uU4Q)
271
+ + [YouTube video] [ChatGPT: Your Crypto Assistant](https://www.youtube.com/watch?v=LpzeshX6s2w)
272
+ + [YouTube video] [Generate Insane Trading Returns with ChatGPT and TradingView](https://www.youtube.com/watch?v=ekz6ugJE1h0&t=3s)
273
+
274
+ <!---
275
+ **(Fast and accurate) Sentiment Analysis**
276
+
277
+ GPT-3 can help study customer surveys, social media tweets from customers/users.
278
+
279
+ Tweets
280
+ + [Tweet Classifier](https://platform.openai.com/playground/p/default-tweet-classifier?model=text-davinci-003)
281
+ + [Advanced Tweet Classifier](https://platform.openai.com/playground/p/default-adv-tweet-classifier?model=text-davinci-003)
282
+
283
+ Financial News
284
+ + [Algorithmic Trading using Sentiment Analysis on News Articles](https://towardsdatascience.com/https-towardsdatascience-com-algorithmic-trading-using-sentiment-analysis-on-news-articles-83db77966704)
285
+ + [Accessing Historical Financial News Headlines with Python](https://python.plainenglish.io/access-historical-financial-news-headlines-with-python-be1b8faaea9f)
286
+
287
+ **PromptNet** Analogy to ImageNet and WordNet, it is critical to build a PromptNet.
288
+
289
+ + [Awesome_Prompting_Papers_in_Computer_Vision](https://github.com/ttengwang/Awesome_Prompting_Papers_in_Computer_Vision)
290
+ + [OpenPrompt](https://github.com/thunlp/OpenPrompt)
291
+ + [promptsource](https://github.com/bigscience-workshop/promptsource)
292
+
293
+ **Robo-advisor**
294
+
295
+ **Coding-tutor**
296
+
297
+ + [Hey, ChatGPT! Explain FinRL code to me!](https://medium.com/@ai4finance/hey-chatgpt-explain-finrl-code-to-me-6a91d612296f)
298
+
299
+ **Blogs about ChatGPT for FinTech**
300
+
301
+ ## ChatGPT APIs
302
+
303
+ Prompting as a new programming paradigm!
304
+ + [Towards Data Science] [GPT-3: Creative Potential of NLP](https://towardsdatascience.com/gpt-3-creative-potential-of-nlp-d5ccae16c1ab)
305
+ + [YouTube video] [OpenAI GPT-3 - Prompt Engineering For Financial NLP](https://www.youtube.com/watch?v=Nl2Cdbao5Ws)
306
+
307
+ + [OpenAI API for GPT-3](https://platform.openai.com/docs/models/gpt-3)
308
+ + [ChatGPT-wrapper: python and shell](https://github.com/mmabrouk/chatgpt-wrapper)
309
+ + [OpenAI Examples Library](https://platform.openai.com/examples)
310
+ + [GPT-3 Sandbox (Github)](https://github.com/shreyashankar/gpt3-sandbox) Enable users to create cool web demos using OpenAI GPT-3 API.
311
+ + [Exploring the Capabilities of the ChatGPT API: A Beginner’s Guide](https://levelup.gitconnected.com/exploring-the-capabilities-of-the-chatgpt-api-a-beginners-guide-e9089d49961f)
312
+ + [Reverse engineered ChatGPT API](https://github.com/acheong08/ChatGPT)
313
+
314
+ **Prompting programming**
315
+
316
+ ## ChatGPT relatives:
317
+
318
+ [A Release Timeline](https://github.com/osanseviero/ml_timeline) of many LLMs.
319
+
320
+ [PaLM](https://arxiv.org/abs/2204.02311)
321
+
322
+ [Chincella](https://arxiv.org/abs/2203.15556)
323
+
324
+ Interesting evaluations:
325
+ + [RLHF for pretraining](https://arxiv.org/abs/2302.08582)
326
+
327
+ + [Compare ChatGPT with GPT3.5](https://arxiv.org/pdf/2302.06476.pdf)
328
+
329
+ + [Is ChatGPT A Good Translator? A Preliminary Study](https://arxiv.org/pdf/2301.08745.pdf)
330
+
331
+ + [A Multitask, Multilingual, Multimodal Evaluation of ChatGPT
332
+ on Reasoning, Hallucination, and Interactivity](https://arxiv.org/pdf/2302.04023.pdf)
333
+
334
+ [YouTube video] [Physics Solution: ChatGPT vs. Google](https://www.youtube.com/watch?v=x4dIx9VYQoM)
335
+ --->
336
+
337
+ ## Citing FinGPT
338
+ ```
339
+ @article{yang2023fingpt,
340
+ title={FinGPT: Open-Source Financial Large Language Models},
341
+ author={Yang, Hongyang and Liu, Xiao-Yang and Wang, Christina Dan},
342
+ journal={FinLLM Symposium at IJCAI 2023},
343
+ year={2023}
344
+ }
345
+ @article{zhang2023instructfingpt,
346
+ title={Instruct-FinGPT: Financial Sentiment Analysis by Instruction Tuning of General-Purpose Large Language Models},
347
+ author={Boyu Zhang and Hongyang Yang and Xiao-Yang Liu},
348
+ journal={FinLLM Symposium at IJCAI 2023},
349
+ year={2023}
350
+ }
351
+ @article{zhang2023fingptrag,
352
+ title={Enhancing Financial Sentiment Analysis via Retrieval Augmented Large Language Models},
353
+ author={Zhang, Boyu and Yang, Hongyang and Zhou, tianyu and Babar, Ali and Liu, Xiao-Yang},
354
+ journal = {ACM International Conference on AI in Finance (ICAIF)},
355
+ year={2023}
356
+ }
357
+
358
+ @article{wang2023fingptbenchmark,
359
+ title={FinGPT: Instruction Tuning Benchmark for Open-Source Large Language Models in Financial Datasets},
360
+ author={Wang, Neng and Yang, Hongyang and Wang, Christina Dan},
361
+ journal={NeurIPS Workshop on Instruction Tuning and Instruction Following},
362
+ year={2023}
363
+ }
364
+ @article{2023finnlp,
365
+ title={Data-centric FinGPT: Democratizing Internet-scale Data for Financial Large Language Models},
366
+ author={Liu, Xiao-Yang and Wang, Guoxuan and Yang, Hongyang and Zha, Daochen},
367
+ journal={NeurIPS Workshop on Instruction Tuning and Instruction Following},
368
+ year={2023}
369
+ }
370
+
371
+ ```
372
+
373
+ <div align="center">
374
+ <a href="https://finllm.github.io/workshop/#/fcb" target="_blank">
375
+ <img align="center" src=figs/fingpt_best_presentation.png width="65%">
376
+ </div>
377
+
378
+
379
+ ## LICENSE
380
+
381
+ MIT License
382
+
383
+ **Disclaimer: We are sharing codes for academic purposes under the MIT education license. Nothing herein is financial advice, and NOT a recommendation to trade real money. Please use common sense and always first consult a professional before trading or investing.**
384
+
external/FinGPT/fingpt/FinGPT_Benchmark/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .data.download import download as download_datasets
2
+ from . import benchmarks
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from . import fpb, fiqa, finred, fineval, convfinqa, headline, ner, nwgi, tfns
2
+
3
+ __all__ = [fpb, fiqa, finred, fineval, convfinqa, headline, ner, nwgi, tfns]
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/benchmarks.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ from peft import PeftModel, get_peft_model, LoraConfig, TaskType # 0.4.0
3
+ import torch
4
+ import argparse
5
+
6
+
7
+ from fpb import test_fpb, test_fpb_mlt
8
+ from fiqa import test_fiqa, test_fiqa_mlt
9
+ from tfns import test_tfns
10
+ from nwgi import test_nwgi
11
+ from headline import test_headline
12
+ from ner import test_ner
13
+ from convfinqa import test_convfinqa
14
+ from fineval import test_fineval
15
+ from finred import test_re
16
+
17
+
18
+ import sys
19
+ sys.path.append('../')
20
+ from utils import *
21
+
22
+
23
+ def main(args):
24
+ if args.from_remote:
25
+ model_name = parse_model_name(args.base_model, args.from_remote)
26
+ else:
27
+ model_name = '../' + parse_model_name(args.base_model)
28
+
29
+
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ model_name, trust_remote_code=True,
32
+ # load_in_8bit=True
33
+ device_map="auto",
34
+ # fp16=True
35
+ )
36
+ model.model_parallel = True
37
+
38
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
39
+
40
+ # tokenizer.pad_token_id = tokenizer.eos_token_id
41
+
42
+ tokenizer.padding_side = "left"
43
+ if args.base_model == 'qwen':
44
+ tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids('<|endoftext|>')
45
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids('<|extra_0|>')
46
+ if not tokenizer.pad_token or tokenizer.pad_token_id == tokenizer.eos_token_id:
47
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
48
+ model.resize_token_embeddings(len(tokenizer))
49
+
50
+ print(f'pad: {tokenizer.pad_token_id}, eos: {tokenizer.eos_token_id}')
51
+
52
+ # peft_config = LoraConfig(
53
+ # task_type=TaskType.CAUSAL_LM,
54
+ # inference_mode=False,
55
+ # r=8,
56
+ # lora_alpha=32,
57
+ # lora_dropout=0.1,
58
+ # target_modules=lora_module_dict[args.base_model],
59
+ # bias='none',
60
+ # )
61
+ # model = get_peft_model(model, peft_config)
62
+ # model.load_state_dict(torch.load(args.peft_model + '/pytorch_model.bin'))
63
+
64
+ model = PeftModel.from_pretrained(model, args.peft_model)
65
+ model = model.eval()
66
+
67
+ with torch.no_grad():
68
+ for data in args.dataset.split(','):
69
+ if data == 'fpb':
70
+ test_fpb(args, model, tokenizer)
71
+ elif data == 'fpb_mlt':
72
+ test_fpb_mlt(args, model, tokenizer)
73
+ elif data == 'fiqa':
74
+ test_fiqa(args, model, tokenizer)
75
+ elif data == 'fiqa_mlt':
76
+ test_fiqa_mlt(args, model, tokenizer)
77
+ elif data == 'tfns':
78
+ test_tfns(args, model, tokenizer)
79
+ elif data == 'nwgi':
80
+ test_nwgi(args, model, tokenizer)
81
+ elif data == 'headline':
82
+ test_headline(args, model, tokenizer)
83
+ elif data == 'ner':
84
+ test_ner(args, model, tokenizer)
85
+ elif data == 'convfinqa':
86
+ test_convfinqa(args, model, tokenizer)
87
+ elif data == 'fineval':
88
+ test_fineval(args, model, tokenizer)
89
+ elif data == 're':
90
+ test_re(args, model, tokenizer)
91
+ else:
92
+ raise ValueError('undefined dataset.')
93
+
94
+ print('Evaluation Ends.')
95
+
96
+
97
+
98
+ if __name__ == "__main__":
99
+
100
+ parser = argparse.ArgumentParser()
101
+ parser.add_argument("--dataset", required=True, type=str)
102
+ parser.add_argument("--base_model", required=True, type=str, choices=['chatglm2', 'llama2', 'llama2-13b', 'llama2-13b-nr', 'baichuan', 'falcon', 'internlm', 'qwen', 'mpt', 'bloom'])
103
+ parser.add_argument("--peft_model", required=True, type=str)
104
+ parser.add_argument("--max_length", default=512, type=int)
105
+ parser.add_argument("--batch_size", default=4, type=int, help="The train batch size per device")
106
+ parser.add_argument("--instruct_template", default='default')
107
+ parser.add_argument("--from_remote", default=False, type=bool)
108
+
109
+ args = parser.parse_args()
110
+
111
+ print(args.base_model)
112
+ print(args.peft_model)
113
+
114
+ main(args)
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/convfinqa.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from seqeval.metrics import accuracy_score
2
+ from datasets import load_dataset, load_from_disk
3
+ from tqdm import tqdm
4
+ import datasets
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from functools import partial
8
+ import re
9
+ import sys
10
+ import numpy as np
11
+ from fingpt.FinGPT_Benchmark.utils import *
12
+ from pathlib import Path
13
+ sys.path.append('../')
14
+
15
+ def cvt_text_to_pred(text):
16
+ if not text:
17
+ return 'nan'
18
+ pred_match = re.search(r'\d+(.\d+)', text)
19
+ if pred_match is not None:
20
+ pred = pred_match.group()
21
+ else:
22
+ print(text)
23
+ pred = '0.0'
24
+ return pred
25
+
26
+
27
+ def map_output(feature):
28
+
29
+ label = cvt_text_to_pred(feature['output'])
30
+ pred = cvt_text_to_pred(feature['out_text'])
31
+
32
+ return {'label': label, 'pred': pred}
33
+
34
+
35
+
36
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fingpt-convfinqa')['test']
37
+ dataset = dataset.map(partial(test_mapping, args), load_from_cache_file=False)
38
+
39
+ def collate_fn(batch):
40
+ inputs = tokenizer(
41
+ [f["prompt"] for f in batch], return_tensors='pt',
42
+ padding=True, max_length=args.max_length,
43
+ return_token_type_ids=False
44
+ )
45
+ return inputs
46
+
47
+ dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
48
+
49
+ out_text_list = []
50
+ log_interval = len(dataloader) // 5
51
+
52
+ for idx, inputs in enumerate(tqdm(dataloader)):
53
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
54
+ res = model.generate(**inputs, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id)
55
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
56
+ if (idx + 1) % log_interval == 0:
57
+ tqdm.write(f'{idx}: {res_sentences[0]}')
58
+ out_text = [o.split("Answer: ")[1] if "Answer: " in o else "" for o in res_sentences]
59
+ out_text_list += out_text
60
+ torch.cuda.empty_cache()
61
+
62
+ dataset = dataset.add_column("out_text", out_text_list)
63
+ dataset = dataset.map(map_output, load_from_cache_file=False)
64
+ dataset = dataset.filter(lambda x: x['pred'] != 'nan')
65
+ dataset = dataset.to_pandas()
66
+
67
+ print(dataset)
68
+ dataset.to_csv('tmp.csv')
69
+
70
+ label = [float(d) for d in dataset['label']]
71
+ pred = [float(d) for d in dataset['pred']]
72
+
73
+ print('Accuracy: ', accuracy_score(label, pred))
74
+
75
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/evaluate.sh ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
2
+ # export TOKENIZERS_PARALLELISM=0
3
+
4
+
5
+
6
+
7
+ #---- Relation Extraction ----
8
+
9
+ python benchmarks.py \
10
+ --dataset re \
11
+ --base_model llama2 \
12
+ --peft_model ../finetuned_models/finred-llama2-linear_202310012254 \
13
+ --batch_size 8 \
14
+ --max_length 512
15
+
16
+ # python benchmarks.py \
17
+ # --dataset re \
18
+ # --base_model chatglm2 \
19
+ # --peft_model ../finetuned_models/finred-chatglm2-linear_202310010213 \
20
+ # --batch_size 8 \
21
+ # --max_length 512
22
+
23
+ # python benchmarks.py \
24
+ # --dataset re \
25
+ # --base_model qwen \
26
+ # --peft_model ../finetuned_models/finred-qwen-linear_202310010502 \
27
+ # --batch_size 8 \
28
+ # --max_length 512
29
+
30
+ # python benchmarks.py \
31
+ # --dataset re \
32
+ # --base_model mpt \
33
+ # --peft_model ../finetuned_models/finred-mpt-linear_202310010641 \
34
+ # --batch_size 8 \
35
+ # --max_length 512
36
+
37
+ # python benchmarks.py \
38
+ # --dataset re \
39
+ # --base_model bloom \
40
+ # --peft_model ../finetuned_models/finred-bloom-linear_202310010741 \
41
+ # --batch_size 8 \
42
+ # --max_length 512
43
+
44
+ # python benchmarks.py \
45
+ # --dataset re \
46
+ # --base_model falcon \
47
+ # --peft_model ../finetuned_models/finred-falcon-linear_202310010333 \
48
+ # --batch_size 1 \
49
+ # --max_length 512
50
+
51
+
52
+ #---- Generalization ----
53
+
54
+
55
+ # python benchmarks.py \
56
+ # --dataset fiqa_mlt \
57
+ # --base_model falcon \
58
+ # --peft_model ../finetuned_models/GRCLS-sentiment-falcon-linear-small_202309291801/checkpoint-300 \
59
+ # --batch_size 8 \
60
+ # --max_length 512
61
+
62
+ # python benchmarks.py \
63
+ # --dataset fpb_mlt \
64
+ # --base_model llama2 \
65
+ # --peft_model ../finetuned_models/GRCLS-sentiment-llama2-linear-small_202309290356/checkpoint-800 \
66
+ # --batch_size 8 \
67
+ # --max_length 512
68
+
69
+ # python benchmarks.py \
70
+ # --dataset fiqa_mlt \
71
+ # --base_model qwen \
72
+ # --peft_model ../finetuned_models/GRCLS-sentiment-qwen-linear-small_202309292115/checkpoint-700 \
73
+ # --batch_size 8 \
74
+ # --max_length 512
75
+
76
+ # python benchmarks.py \
77
+ # --dataset fpb_mlt \
78
+ # --base_model mpt \
79
+ # --peft_model ../finetuned_models/GRCLS-sentiment-mpt-linear-small_202309300359/checkpoint-400 \
80
+ # --batch_size 8 \
81
+ # --max_length 512
82
+
83
+ # python benchmarks.py \
84
+ # --dataset fiqa_mlt \
85
+ # --base_model chatglm2 \
86
+ # --peft_model ../finetuned_models/GRCLS-sentiment-chatglm2-linear-1e-4lr_202309280440/checkpoint-212 \
87
+ # --batch_size 8 \
88
+ # --max_length 512
89
+
90
+ # python benchmarks.py \
91
+ # --dataset fiqa_mlt \
92
+ # --base_model bloom \
93
+ # --peft_model ../finetuned_models/GRCLS-sentiment-bloom-linear-small_202309300044/checkpoint-500 \
94
+ # --batch_size 8 \
95
+ # --max_length 512
96
+
97
+
98
+
99
+
100
+ #---- Multi-Task ----
101
+
102
+ # python benchmarks.py \
103
+ # --dataset re \
104
+ # --base_model chatglm2 \
105
+ # --peft_model ../finetuned_models/MT-chatglm2-linear_202309201120 \
106
+ # --batch_size 8 \
107
+ # --max_length 512
108
+
109
+ # python benchmarks.py \
110
+ # --dataset re \
111
+ # --base_model falcon \
112
+ # --peft_model ../finetuned_models/MT-falcon-linear_202309210126 \
113
+ # --batch_size 8 \
114
+ # --max_length 512
115
+
116
+ # python benchmarks.py \
117
+ # --dataset re \
118
+ # --base_model bloom \
119
+ # --peft_model ../finetuned_models/MT-bloom-linear_202309211510 \
120
+ # --batch_size 8 \
121
+ # --max_length 512
122
+
123
+ # python benchmarks.py \
124
+ # --dataset re \
125
+ # --base_model qwen \
126
+ # --peft_model ../finetuned_models/MT-qwen-linear_202309221011 \
127
+ # --batch_size 8 \
128
+ # --max_length 512
129
+
130
+ # python benchmarks.py \
131
+ # --dataset re \
132
+ # --base_model mpt \
133
+ # --peft_model ../finetuned_models/MT-mpt-linear_202309230221 \
134
+ # --batch_size 8 \
135
+ # --max_length 512
136
+
137
+ # python benchmarks.py \
138
+ # --dataset re \
139
+ # --base_model llama2 \
140
+ # --peft_model ../finetuned_models/MT-llama2-linear_202309241345 \
141
+ # --batch_size 8 \
142
+ # --max_length 512
143
+
144
+ # python benchmarks.py \
145
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
146
+ # --base_model chatglm2 \
147
+ # --peft_model ../finetuned_models/MT-chatglm2-linear_202309201120 \
148
+ # --batch_size 8 \
149
+ # --max_length 512
150
+
151
+ # python benchmarks.py \
152
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
153
+ # --base_model falcon \
154
+ # --peft_model ../finetuned_models/MT-falcon-linear_202309210126 \
155
+ # --batch_size 8 \
156
+ # --max_length 512
157
+
158
+ # python benchmarks.py \
159
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
160
+ # --base_model bloom \
161
+ # --peft_model ../finetuned_models/MT-bloom-linear_202309211510 \
162
+ # --batch_size 8 \
163
+ # --max_length 512
164
+
165
+ # python benchmarks.py \
166
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
167
+ # --base_model qwen \
168
+ # --peft_model ../finetuned_models/MT-qwen-linear_202309221011 \
169
+ # --batch_size 8 \
170
+ # --max_length 512
171
+
172
+ # python benchmarks.py \
173
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
174
+ # --base_model mpt \
175
+ # --peft_model ../finetuned_models/MT-mpt-linear_202309230221 \
176
+ # --batch_size 8 \
177
+ # --max_length 512
178
+
179
+ # python benchmarks.py \
180
+ # --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
181
+ # --base_model llama2 \
182
+ # --peft_model ../finetuned_models/MT-llama2-linear_202309241345 \
183
+ # --batch_size 8 \
184
+ # --max_length 512
185
+
186
+
187
+ #---- ConvFinQA ----
188
+
189
+ # python benchmarks.py \
190
+ # --dataset convfinqa \
191
+ # --base_model falcon \
192
+ # --peft_model ../finetuned_models/convfinqa-falcon-linear_202309170614 \
193
+ # --batch_size 1 \
194
+ # --max_length 2048
195
+
196
+ # python benchmarks.py \
197
+ # --dataset convfinqa \
198
+ # --base_model chatglm2 \
199
+ # --peft_model ../finetuned_models/convfinqa-chatglm2-linear_202309170247 \
200
+ # --batch_size 1 \
201
+ # --max_length 2048
202
+
203
+ # python benchmarks.py \
204
+ # --dataset convfinqa \
205
+ # --base_model qwen \
206
+ # --peft_model ../finetuned_models/convfinqa-qwen-linear_202309171029 \
207
+ # --batch_size 1 \
208
+ # --max_length 2048
209
+
210
+ # python benchmarks.py \
211
+ # --dataset convfinqa \
212
+ # --base_model bloom \
213
+ # --peft_model ../finetuned_models/convfinqa-bloom-linear_202309171502 \
214
+ # --batch_size 1 \
215
+ # --max_length 2048
216
+
217
+ # python benchmarks.py \
218
+ # --dataset convfinqa \
219
+ # --base_model llama2 \
220
+ # --peft_model ../finetuned_models/convfinqa-llama2-linear_202309162205 \
221
+ # --batch_size 1 \
222
+ # --max_length 2048
223
+
224
+
225
+ #---- FinEval ----
226
+
227
+ # python benchmarks.py \
228
+ # --dataset fineval \
229
+ # --base_model falcon \
230
+ # --peft_model ../finetuned_models/fineval-falcon-linear_202309220409 \
231
+ # --batch_size 1
232
+
233
+ # python benchmarks.py \
234
+ # --dataset fineval \
235
+ # --base_model chatglm2 \
236
+ # --peft_model ../finetuned_models/fineval-chatglm2-linear_202309220332 \
237
+ # --batch_size 1
238
+
239
+ # python benchmarks.py \
240
+ # --dataset fineval \
241
+ # --base_model qwen \
242
+ # --peft_model ../finetuned_models/fineval-qwen-linear_202309220508 \
243
+ # --batch_size 1
244
+
245
+ # python benchmarks.py \
246
+ # --dataset fineval \
247
+ # --base_model bloom \
248
+ # --peft_model ../finetuned_models/fineval-bloom-linear_202309220639 \
249
+ # --batch_size 1
250
+
251
+ # python benchmarks.py \
252
+ # --dataset fineval \
253
+ # --base_model mpt \
254
+ # --peft_model ../finetuned_models/fineval-mpt-linear_202309220555 \
255
+ # --batch_size 1
256
+
257
+ # python benchmarks.py \
258
+ # --dataset fineval \
259
+ # --base_model llama2 \
260
+ # --peft_model ../finetuned_models/fineval-llama2-linear_202309192232 \
261
+ # --batch_size 1
262
+
263
+ # python benchmarks.py \
264
+ # --dataset fineval \
265
+ # --base_model internlm \
266
+ # --peft_model ../finetuned_models/fineval-internlm-linear_202309211248 \
267
+ # --batch_size 1
268
+
269
+
270
+ #---- NER ----
271
+
272
+ # python benchmarks.py \
273
+ # --dataset ner \
274
+ # --base_model falcon \
275
+ # --peft_model ../finetuned_models/ner-falcon-linear_202309160320 \
276
+ # --batch_size 1
277
+
278
+ # python benchmarks.py \
279
+ # --dataset ner \
280
+ # --base_model chatglm2 \
281
+ # --peft_model ../finetuned_models/ner-chatglm2-linear_202309160238 \
282
+ # --batch_size 1
283
+
284
+ # python benchmarks.py \
285
+ # --dataset ner \
286
+ # --base_model qwen \
287
+ # --peft_model ../finetuned_models/ner-qwen-linear_202309160409 \
288
+ # --batch_size 1
289
+
290
+ # python benchmarks.py \
291
+ # --dataset ner \
292
+ # --base_model bloom \
293
+ # --peft_model ../finetuned_models/ner-bloom-linear_202309160530 \
294
+ # --batch_size 1
295
+
296
+ # python benchmarks.py \
297
+ # --dataset ner \
298
+ # --base_model mpt \
299
+ # --peft_model ../finetuned_models/ner-mpt-linear_202309160459 \
300
+ # --batch_size 1
301
+
302
+ # python benchmarks.py \
303
+ # --dataset ner \
304
+ # --base_model llama2 \
305
+ # --peft_model ../finetuned_models/ner-llama2-linear_202309161924 \
306
+ # --batch_size 1
307
+
308
+ #---- sentiment analysis ----
309
+
310
+ # python benchmarks.py \
311
+ # --dataset fpb,fiqa,tfns,nwgi \
312
+ # --base_model llama2 \
313
+ # --peft_model ../finetuned_models/sentiment-llama2-linear_202309130723 \
314
+ # --batch_size 8
315
+
316
+ # python benchmarks.py \
317
+ # --dataset fpb,fiqa,tfns,nwgi \
318
+ # --base_model falcon \
319
+ # --peft_model ../finetuned_models/sentiment-falcon-default_20230911055454 \
320
+ # --batch_size 8
321
+
322
+ # python benchmarks.py \
323
+ # --dataset fpb,fiqa,tfns,nwgi \
324
+ # --base_model chatglm2 \
325
+ # --peft_model ../finetuned_models/sentiment-chatglm2-default_20230910031650 \
326
+ # --batch_size 8
327
+
328
+ # python benchmarks.py \
329
+ # --dataset fpb,fiqa,tfns,nwgi \
330
+ # --base_model qwen \
331
+ # --peft_model ../finetuned_models/sentiment-qwen-linear_202309132016 \
332
+ # --batch_size 8
333
+
334
+ # python benchmarks.py \
335
+ # --dataset fpb,fiqa,tfns,nwgi \
336
+ # --base_model internlm \
337
+ # --peft_model ../finetuned_models/sentiment-internlm-linear_202309130230 \
338
+ # --batch_size 8
339
+
340
+ # python benchmarks.py \
341
+ # --dataset fpb,fiqa,tfns,nwgi \
342
+ # --base_model bloom \
343
+ # --peft_model ../finetuned_models/sentiment-bloom-linear_202309151934 \
344
+ # --batch_size 8
345
+
346
+ # python benchmarks.py \
347
+ # --dataset fpb,fiqa,tfns,nwgi \
348
+ # --base_model mpt \
349
+ # --peft_model ../finetuned_models/sentiment-mpt-linear_202309151405 \
350
+ # --batch_size 8
351
+
352
+
353
+ #---- headline ----
354
+
355
+ # python benchmarks.py \
356
+ # --dataset headline \
357
+ # --base_model llama2 \
358
+ # --peft_model ../finetuned_models/headline-llama2-linear_202309140611 \
359
+ # --batch_size 8
360
+
361
+ # python benchmarks.py \
362
+ # --dataset headline \
363
+ # --base_model chatglm2 \
364
+ # --peft_model ../finetuned_models/headline-chatglm2-linear_202309140941 \
365
+ # --batch_size 8
366
+
367
+ # python benchmarks.py \
368
+ # --dataset headline \
369
+ # --base_model internlm \
370
+ # --peft_model ../finetuned_models/headline-internlm-linear_202309140308 \
371
+ # --batch_size 8
372
+
373
+ # python benchmarks.py \
374
+ # --dataset headline \
375
+ # --base_model falcon \
376
+ # --peft_model ../finetuned_models/headline-falcon-linear_202309141852 \
377
+ # --batch_size 8
378
+
379
+ # python benchmarks.py \
380
+ # --dataset headline \
381
+ # --base_model qwen \
382
+ # --peft_model ../finetuned_models/headline-qwen-linear_202309142156 \
383
+ # --batch_size 8
384
+
385
+ # python benchmarks.py \
386
+ # --dataset headline \
387
+ # --base_model mpt \
388
+ # --peft_model ../finetuned_models/headline-mpt-linear_202309150151 \
389
+ # --batch_size 8
390
+
391
+ # python benchmarks.py \
392
+ # --dataset headline \
393
+ # --base_model bloom \
394
+ # --peft_model ../finetuned_models/headline-bloom-linear_202309151641 \
395
+ # --batch_size 8
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fineval.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from seqeval.metrics import accuracy_score
2
+ from datasets import load_dataset, load_from_disk
3
+ from tqdm import tqdm
4
+ import datasets
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from functools import partial
8
+ import re
9
+ import sys
10
+ import numpy as np
11
+ from fingpt.FinGPT_Benchmark.utils import *
12
+ from pathlib import Path
13
+ sys.path.append('../')
14
+
15
+
16
+ def cvt_text_to_pred(text):
17
+
18
+ pred_match = re.search(r'[ABCD]', text)
19
+ if pred_match is not None:
20
+ pred = pred_match.group()
21
+ pred = ["A", "B", "C", "D"].index(pred)
22
+ else:
23
+ pred = -1
24
+ return pred
25
+
26
+
27
+ def map_output(feature):
28
+
29
+ label = cvt_text_to_pred(feature['output'])
30
+ pred = cvt_text_to_pred(feature['out_text'])
31
+
32
+ return {'label': label, 'pred': pred}
33
+
34
+
35
+ def test_fineval(args, model, tokenizer):
36
+
37
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fingpt-fineval')['test']
38
+ dataset = dataset.map(partial(test_mapping, args), load_from_cache_file=False)
39
+
40
+ def collate_fn(batch):
41
+ inputs = tokenizer(
42
+ [f["prompt"] for f in batch], return_tensors='pt',
43
+ padding=True, max_length=args.max_length,
44
+ return_token_type_ids=False
45
+ )
46
+ return inputs
47
+
48
+ dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
49
+
50
+ out_text_list = []
51
+ log_interval = len(dataloader) // 5
52
+
53
+ for idx, inputs in enumerate(tqdm(dataloader)):
54
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
55
+ res = model.generate(**inputs, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id)
56
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
57
+ if (idx + 1) % log_interval == 0:
58
+ tqdm.write(f'{idx}: {res_sentences[0]}')
59
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
60
+ out_text_list += out_text
61
+ torch.cuda.empty_cache()
62
+
63
+ dataset = dataset.add_column("out_text", out_text_list)
64
+ dataset = dataset.map(map_output, load_from_cache_file=False)
65
+ dataset = dataset.to_pandas()
66
+
67
+ print(dataset)
68
+ dataset.to_csv('tmp.csv')
69
+
70
+ print('Accuracy:', accuracy_score(dataset['label'], dataset['pred']))
71
+
72
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/finred.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from seqeval.metrics import classification_report
2
+ from datasets import load_dataset, load_from_disk
3
+ from tqdm import tqdm
4
+ import datasets
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from functools import partial
8
+ import re
9
+ import sys
10
+ import numpy as np
11
+ from fingpt.FinGPT_Benchmark.utils import *
12
+ from pathlib import Path
13
+ sys.path.append('../')
14
+
15
+
16
+ relations = [
17
+ 'product_or_material_produced',
18
+ 'manufacturer',
19
+ 'distributed_by',
20
+ 'industry',
21
+ 'position_held',
22
+ 'original_broadcaster',
23
+ 'owned_by',
24
+ 'founded_by',
25
+ 'distribution_format',
26
+ 'headquarters_location',
27
+ 'stock_exchange',
28
+ 'currency',
29
+ 'parent_organization',
30
+ 'chief_executive_officer',
31
+ 'director_/_manager',
32
+ 'owner_of',
33
+ 'operator',
34
+ 'member_of',
35
+ 'employer',
36
+ 'chairperson',
37
+ 'platform',
38
+ 'subsidiary',
39
+ 'legal_form',
40
+ 'publisher',
41
+ 'developer',
42
+ 'brand',
43
+ 'business_division',
44
+ 'location_of_formation',
45
+ 'creator',
46
+ ]
47
+
48
+
49
+ def cvt_text_to_pred(ref, text):
50
+
51
+ preds = []
52
+ for pred_txt in text.strip('.').split(';'):
53
+ pred_match = re.match(r'^(.*):(.*),(.*)$', pred_txt)
54
+ if pred_match is not None:
55
+ relation, word1, word2 = pred_match.group(1).strip(), pred_match.group(2).strip(), pred_match.group(3).strip()
56
+ if relation in relations and word1 in ref and word2 in ref:
57
+ preds.append((relation, word1, word2))
58
+ else:
59
+ print("Not found Error: ", relation, word1, word2, ref)
60
+ else:
61
+ print("Parse Error: ", pred_txt)
62
+
63
+ return preds
64
+
65
+
66
+ def map_output(feature):
67
+
68
+ ref = feature['input']
69
+ label = cvt_text_to_pred(ref, feature['output'])
70
+ pred = cvt_text_to_pred(ref, feature['out_text'])
71
+
72
+ return {'label': label, 'pred': pred}
73
+
74
+
75
+ def calc_metric(gt_list, pred_list):
76
+ # Initialize variables for true positives, false positives, and false negatives
77
+ true_positives = 0
78
+ false_positives = 0
79
+ false_negatives = 0
80
+
81
+ for (ground_truth, predicted_relations) in zip(gt_list, pred_list):
82
+ # Calculate true positives, false positives, and false negatives
83
+ for relation in predicted_relations:
84
+ if relation in ground_truth:
85
+ true_positives += 1
86
+ else:
87
+ false_positives += 1
88
+
89
+ for relation in ground_truth:
90
+ if relation not in predicted_relations:
91
+ false_negatives += 1
92
+
93
+ # Calculate precision, recall, and F1-Score
94
+ precision = true_positives / (true_positives + false_positives)
95
+ recall = true_positives / (true_positives + false_negatives)
96
+ f1_score = 2 * (precision * recall) / (precision + recall)
97
+
98
+ # Print the results
99
+ print("Precision:", precision)
100
+ print("Recall:", recall)
101
+ print("F1-Score:", f1_score)
102
+
103
+
104
+ def test_re(args, model, tokenizer):
105
+
106
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fingpt-finred-re')['test']
107
+ dataset = dataset.train_test_split(0.2, seed=42)['test']
108
+ dataset = dataset.map(partial(test_mapping, args), load_from_cache_file=False)
109
+
110
+ def collate_fn(batch):
111
+ inputs = tokenizer(
112
+ [f["prompt"] for f in batch], return_tensors='pt',
113
+ padding=True, max_length=args.max_length,
114
+ return_token_type_ids=False
115
+ )
116
+ return inputs
117
+
118
+ dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
119
+
120
+ out_text_list = []
121
+ log_interval = len(dataloader) // 5
122
+
123
+ for idx, inputs in enumerate(tqdm(dataloader)):
124
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
125
+ res = model.generate(**inputs, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id, max_new_tokens=128)
126
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
127
+ if (idx + 1) % log_interval == 0:
128
+ tqdm.write(f'{idx}: {res_sentences[0]}')
129
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
130
+ out_text_list += out_text
131
+ torch.cuda.empty_cache()
132
+
133
+ dataset = dataset.add_column("out_text", out_text_list)
134
+ dataset = dataset.map(map_output, load_from_cache_file=False)
135
+ dataset = dataset.to_pandas()
136
+
137
+ print(dataset)
138
+ dataset.to_csv('tmp.csv')
139
+
140
+ label = [[tuple(t) for t in d.tolist()] for d in dataset['label']]
141
+ pred = [[tuple(t) for t in d.tolist()] for d in dataset['pred']]
142
+
143
+ label_re = [[t[0] for t in d.tolist()] for d in dataset['label']]
144
+ pred_re = [[t[0] for t in d.tolist()] for d in dataset['pred']]
145
+
146
+ calc_metric(label, pred)
147
+
148
+ calc_metric(label_re, pred_re)
149
+
150
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fiqa.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ from sklearn.metrics import accuracy_score,f1_score
5
+ from datasets import load_dataset, load_from_disk, Dataset
6
+ from tqdm import tqdm
7
+ import datasets
8
+ import torch
9
+
10
+ from torch.utils.data import DataLoader
11
+ from functools import partial
12
+ from pathlib import Path
13
+
14
+
15
+ with open(Path(__file__).parent / 'sentiment_templates.txt') as f:
16
+ templates = [l.strip() for l in f.readlines()]
17
+
18
+
19
+ def format_example(example: dict) -> dict:
20
+ context = f"Instruction: {example['instruction']}\n"
21
+ if example.get("input"):
22
+ context += f"Input: {example['input']}\n"
23
+ context += "Answer: "
24
+ target = example["output"]
25
+ return {"context": context, "target": target}
26
+
27
+ def add_instructions(x):
28
+ if x.format == "post":
29
+ return "What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}."
30
+ else:
31
+ return "What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}."
32
+
33
+ def make_label(x):
34
+ if x < - 0.1: return "negative"
35
+ elif x >=-0.1 and x < 0.1: return "neutral"
36
+ elif x >= 0.1: return "positive"
37
+
38
+ def change_target(x):
39
+ if 'positive' in x or 'Positive' in x:
40
+ return 'positive'
41
+ elif 'negative' in x or 'Negative' in x:
42
+ return 'negative'
43
+ else:
44
+ return 'neutral'
45
+
46
+ def vote_output(x):
47
+ output_dict = {'positive': 0, 'negative': 0, 'neutral': 0}
48
+ for i in range(len(templates)):
49
+ pred = change_target(x[f'out_text_{i}'].lower())
50
+ output_dict[pred] += 1
51
+ if output_dict['positive'] > output_dict['negative']:
52
+ return 'positive'
53
+ elif output_dict['negative'] > output_dict['positive']:
54
+ return 'negative'
55
+ else:
56
+ return 'neutral'
57
+
58
+
59
+ def test_fiqa(args, model, tokenizer, prompt_fun=add_instructions):
60
+ batch_size = args.batch_size
61
+ # dataset = load_dataset('pauri32/fiqa-2018')
62
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fiqa-2018/')
63
+ dataset = datasets.concatenate_datasets([dataset["train"], dataset["validation"] ,dataset["test"] ])
64
+ dataset = dataset.train_test_split(0.226, seed = 42)['test']
65
+ dataset = dataset.to_pandas()
66
+ dataset["output"] = dataset.sentiment_score.apply(make_label)
67
+ if prompt_fun is None:
68
+ dataset["instruction"] = "What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}."
69
+ else:
70
+ dataset["instruction"] = dataset.apply(prompt_fun, axis = 1)
71
+
72
+ dataset = dataset[['sentence', 'output',"instruction"]]
73
+ dataset.columns = ["input", "output","instruction"]
74
+ dataset[["context","target"]] = dataset.apply(format_example, axis=1, result_type="expand")
75
+
76
+ # print example
77
+ print(f"\n\nPrompt example:\n{dataset['context'][0]}\n\n")
78
+
79
+ context = dataset['context'].tolist()
80
+ total_steps = dataset.shape[0]//batch_size + 1
81
+ print(f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")
82
+
83
+ out_text_list = []
84
+
85
+ for i in tqdm(range(total_steps)):
86
+ tmp_context = context[i* batch_size:(i+1)* batch_size]
87
+ tokens = tokenizer(tmp_context, return_tensors='pt', padding=True, max_length=512, return_token_type_ids=False)
88
+ # tokens.pop('token_type_ids')
89
+ for k in tokens.keys():
90
+ tokens[k] = tokens[k].cuda()
91
+
92
+ res = model.generate(**tokens, max_length=512, eos_token_id=tokenizer.eos_token_id)
93
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
94
+ tqdm.write(f'{i}: {res_sentences[0]}')
95
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
96
+ out_text_list += out_text
97
+ torch.cuda.empty_cache()
98
+
99
+ dataset["out_text"] = out_text_list
100
+ dataset["new_target"] = dataset["target"].apply(change_target)
101
+ dataset["new_out"] = dataset["out_text"].apply(change_target)
102
+
103
+ acc = accuracy_score(dataset["new_target"], dataset["new_out"])
104
+ f1_macro = f1_score(dataset["new_target"], dataset["new_out"], average = "macro")
105
+ f1_micro = f1_score(dataset["new_target"], dataset["new_out"], average = "micro")
106
+ f1_weighted = f1_score(dataset["new_target"], dataset["new_out"], average = "weighted")
107
+
108
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
109
+
110
+ return dataset
111
+
112
+
113
+ def test_fiqa_mlt(args, model, tokenizer):
114
+ batch_size = args.batch_size
115
+ # dataset = load_dataset('pauri32/fiqa-2018')
116
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fiqa-2018/')
117
+ dataset = datasets.concatenate_datasets([dataset["train"], dataset["validation"] ,dataset["test"] ])
118
+ dataset = dataset.train_test_split(0.226, seed=42)['test']
119
+ dataset = dataset.to_pandas()
120
+ dataset["output"] = dataset.sentiment_score.apply(make_label)
121
+ dataset["text_type"] = dataset.apply(lambda x: 'tweet' if x.format == "post" else 'news', axis=1)
122
+ dataset = dataset[['sentence', 'output', "text_type"]]
123
+ dataset.columns = ["input", "output", "text_type"]
124
+
125
+ dataset["output"] = dataset["output"].apply(change_target)
126
+ dataset = dataset[dataset["output"] != 'neutral']
127
+
128
+ out_texts_list = [[] for _ in range(len(templates))]
129
+
130
+ def collate_fn(batch):
131
+ inputs = tokenizer(
132
+ [f["context"] for f in batch], return_tensors='pt',
133
+ padding=True, max_length=args.max_length,
134
+ return_token_type_ids=False
135
+ )
136
+ return inputs
137
+
138
+ for i, template in enumerate(templates):
139
+ dataset = dataset[['input', 'output', "text_type"]]
140
+ dataset["instruction"] = dataset['text_type'].apply(lambda x: template.format(type=x) + "\nOptions: positive, negative")
141
+ # dataset["instruction"] = dataset['text_type'].apply(lambda x: template.format(type=x) + "\nOptions: negative, positive")
142
+ dataset[["context", "target"]] = dataset.apply(format_example, axis=1, result_type="expand")
143
+
144
+ dataloader = DataLoader(Dataset.from_pandas(dataset), batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
145
+
146
+ log_interval = len(dataloader) // 5
147
+
148
+ for idx, inputs in enumerate(tqdm(dataloader)):
149
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
150
+ res = model.generate(**inputs, do_sample=False, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id)#, max_new_tokens=10)
151
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
152
+ tqdm.write(f'{idx}: {res_sentences[0]}')
153
+ # if (idx + 1) % log_interval == 0:
154
+ # tqdm.write(f'{idx}: {res_sentences[0]}')
155
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
156
+ out_texts_list[i] += out_text
157
+ torch.cuda.empty_cache()
158
+
159
+ for i in range(len(templates)):
160
+ dataset[f"out_text_{i}"] = out_texts_list[i]
161
+ dataset[f"out_text_{i}"] = dataset[f"out_text_{i}"].apply(change_target)
162
+
163
+ dataset["new_out"] = dataset.apply(vote_output, axis=1, result_type="expand")
164
+
165
+ dataset.to_csv('tmp.csv')
166
+
167
+ for k in [f"out_text_{i}" for i in range(len(templates))] + ["new_out"]:
168
+
169
+ acc = accuracy_score(dataset["target"], dataset[k])
170
+ f1_macro = f1_score(dataset["target"], dataset[k], average="macro")
171
+ f1_micro = f1_score(dataset["target"], dataset[k], average="micro")
172
+ f1_weighted = f1_score(dataset["target"], dataset[k], average="weighted")
173
+
174
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
175
+
176
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/fpb.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ from sklearn.metrics import accuracy_score,f1_score
5
+ from datasets import load_dataset, load_from_disk, Dataset
6
+ from tqdm import tqdm
7
+ import datasets
8
+ import torch
9
+
10
+ from torch.utils.data import DataLoader
11
+ from functools import partial
12
+ from pathlib import Path
13
+
14
+ dic = {
15
+ 0:"negative",
16
+ 1:'neutral',
17
+ 2:'positive',
18
+ }
19
+
20
+ with open(Path(__file__).parent / 'sentiment_templates.txt') as f:
21
+ templates = [l.strip() for l in f.readlines()]
22
+
23
+
24
+ def format_example(example: dict) -> dict:
25
+ context = f"Instruction: {example['instruction']}\n"
26
+ if example.get("input"):
27
+ context += f"Input: {example['input']}\n"
28
+ context += "Answer: "
29
+ target = example["output"]
30
+ return {"context": context, "target": target}
31
+
32
+ def change_target(x):
33
+ if 'positive' in x or 'Positive' in x:
34
+ return 'positive'
35
+ elif 'negative' in x or 'Negative' in x:
36
+ return 'negative'
37
+ else:
38
+ return 'neutral'
39
+
40
+
41
+ def vote_output(x):
42
+ output_dict = {'positive': 0, 'negative': 0, 'neutral': 0}
43
+ for i in range(len(templates)):
44
+ pred = change_target(x[f'out_text_{i}'].lower())
45
+ output_dict[pred] += 1
46
+ if output_dict['positive'] > output_dict['negative']:
47
+ return 'positive'
48
+ elif output_dict['negative'] > output_dict['positive']:
49
+ return 'negative'
50
+ else:
51
+ return 'neutral'
52
+
53
+ def test_fpb(args, model, tokenizer, prompt_fun=None):
54
+ batch_size = args.batch_size
55
+ # instructions = load_dataset("financial_phrasebank", "sentences_50agree")
56
+ instructions = load_from_disk(Path(__file__).parent.parent / "data/financial_phrasebank-sentences_50agree/")
57
+ instructions = instructions["train"]
58
+ instructions = instructions.train_test_split(seed = 42)['test']
59
+ instructions = instructions.to_pandas()
60
+ instructions.columns = ["input", "output"]
61
+ instructions["output"] = instructions["output"].apply(lambda x:dic[x])
62
+
63
+ if prompt_fun is None:
64
+ instructions["instruction"] = "What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}."
65
+ else:
66
+ instructions["instruction"] = instructions.apply(prompt_fun, axis = 1)
67
+
68
+ instructions[["context","target"]] = instructions.apply(format_example, axis = 1, result_type="expand")
69
+
70
+ # print example
71
+ print(f"\n\nPrompt example:\n{instructions['context'][0]}\n\n")
72
+
73
+
74
+ context = instructions['context'].tolist()
75
+
76
+ total_steps = instructions.shape[0]//batch_size + 1
77
+ print(f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")
78
+
79
+
80
+ out_text_list = []
81
+ for i in tqdm(range(total_steps)):
82
+ tmp_context = context[i* batch_size:(i+1)* batch_size]
83
+ tokens = tokenizer(tmp_context, return_tensors='pt', padding=True, max_length=512, return_token_type_ids=False)
84
+ for k in tokens.keys():
85
+ tokens[k] = tokens[k].cuda()
86
+ res = model.generate(**tokens, max_length=512, eos_token_id=tokenizer.eos_token_id)
87
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
88
+ # print(f'{i}: {res_sentences[0]}')
89
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
90
+ out_text_list += out_text
91
+ torch.cuda.empty_cache()
92
+
93
+ instructions["out_text"] = out_text_list
94
+ instructions["new_target"] = instructions["target"].apply(change_target)
95
+ instructions["new_out"] = instructions["out_text"].apply(change_target)
96
+
97
+ acc = accuracy_score(instructions["new_target"], instructions["new_out"])
98
+ f1_macro = f1_score(instructions["new_target"], instructions["new_out"], average = "macro")
99
+ f1_micro = f1_score(instructions["new_target"], instructions["new_out"], average = "micro")
100
+ f1_weighted = f1_score(instructions["new_target"], instructions["new_out"], average = "weighted")
101
+
102
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
103
+
104
+ return instructions
105
+
106
+
107
+ def test_fpb_mlt(args, model, tokenizer):
108
+ batch_size = args.batch_size
109
+ # dataset = load_dataset("financial_phrasebank", "sentences_50agree")
110
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/financial_phrasebank-sentences_50agree/')
111
+ dataset = dataset["train"]#.select(range(300))
112
+ dataset = dataset.train_test_split(seed=42)['test']
113
+ dataset = dataset.to_pandas()
114
+ dataset.columns = ["input", "output"]
115
+ dataset["output"] = dataset["output"].apply(lambda x: dic[x])
116
+ dataset["text_type"] = dataset.apply(lambda x: 'news', axis=1)
117
+
118
+ dataset["output"] = dataset["output"].apply(change_target)
119
+ dataset = dataset[dataset["output"] != 'neutral']
120
+
121
+ out_texts_list = [[] for _ in range(len(templates))]
122
+
123
+ def collate_fn(batch):
124
+ inputs = tokenizer(
125
+ [f["context"] for f in batch], return_tensors='pt',
126
+ padding=True, max_length=args.max_length,
127
+ return_token_type_ids=False
128
+ )
129
+ return inputs
130
+
131
+ for i, template in enumerate(templates):
132
+ dataset = dataset[['input', 'output', "text_type"]]
133
+ dataset["instruction"] = dataset['text_type'].apply(lambda x: template.format(type=x) + "\nOptions: positive, negative")
134
+ # dataset["instruction"] = dataset['text_type'].apply(lambda x: template.format(type=x) + "\nOptions: negative, positive")
135
+ dataset[["context", "target"]] = dataset.apply(format_example, axis=1, result_type="expand")
136
+
137
+ dataloader = DataLoader(Dataset.from_pandas(dataset), batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
138
+
139
+ log_interval = len(dataloader) // 5
140
+
141
+ for idx, inputs in enumerate(tqdm(dataloader)):
142
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
143
+ res = model.generate(**inputs, do_sample=False, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id, max_new_tokens=10)
144
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
145
+ tqdm.write(f'{idx}: {res_sentences[0]}')
146
+ # if (idx + 1) % log_interval == 0:
147
+ # tqdm.write(f'{idx}: {res_sentences[0]}')
148
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
149
+ out_texts_list[i] += out_text
150
+ torch.cuda.empty_cache()
151
+
152
+ for i in range(len(templates)):
153
+ dataset[f"out_text_{i}"] = out_texts_list[i]
154
+ dataset[f"out_text_{i}"] = dataset[f"out_text_{i}"].apply(change_target)
155
+
156
+ dataset["new_out"] = dataset.apply(vote_output, axis=1, result_type="expand")
157
+ dataset.to_csv('tmp.csv')
158
+
159
+ for k in [f"out_text_{i}" for i in range(len(templates))] + ["new_out"]:
160
+
161
+ acc = accuracy_score(dataset["target"], dataset[k])
162
+ f1_macro = f1_score(dataset["target"], dataset[k], average="macro")
163
+ f1_micro = f1_score(dataset["target"], dataset[k], average="micro")
164
+ f1_weighted = f1_score(dataset["target"], dataset[k], average="weighted")
165
+
166
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
167
+
168
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/headline.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics import accuracy_score, f1_score, classification_report
2
+ from datasets import load_dataset, load_from_disk
3
+ from tqdm import tqdm
4
+ import datasets
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from functools import partial
8
+ from pathlib import Path
9
+ from fingpt.FinGPT_Benchmark.utils import *
10
+
11
+ import sys
12
+ sys.path.append('../')
13
+
14
+
15
+
16
+ def binary2multi(dataset):
17
+ pred, label = [], []
18
+ tmp_pred, tmp_label = [], []
19
+ for i, row in dataset.iterrows():
20
+ tmp_pred.append(row['pred'])
21
+ tmp_label.append(row['label'])
22
+ if (i + 1) % 9 == 0:
23
+ pred.append(tmp_pred)
24
+ label.append(tmp_label)
25
+ tmp_pred, tmp_label = [], []
26
+ return pred, label
27
+
28
+
29
+ def map_output(feature):
30
+ pred = 1 if 'yes' in feature['out_text'].lower() else 0
31
+ label = 1 if 'yes' in feature['output'].lower() else 0
32
+ return {'label': label, 'pred': pred}
33
+
34
+
35
+ def test_headline(args, model, tokenizer):
36
+
37
+ # dataset = load_from_disk('../data/fingpt-headline')['test']
38
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fingpt-headline-instruct')['test']
39
+ dataset = dataset.map(partial(test_mapping, args), load_from_cache_file=False)
40
+
41
+ def collate_fn(batch):
42
+ inputs = tokenizer(
43
+ [f["prompt"] for f in batch], return_tensors='pt',
44
+ padding=True, max_length=args.max_length,
45
+ return_token_type_ids=False
46
+ )
47
+ return inputs
48
+
49
+ dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
50
+
51
+ out_text_list = []
52
+ log_interval = len(dataloader) // 5
53
+
54
+ for idx, inputs in enumerate(tqdm(dataloader)):
55
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
56
+ res = model.generate(**inputs, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id)
57
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
58
+ tqdm.write(f'{idx}: {res_sentences[0]}')
59
+ if (idx + 1) % log_interval == 0:
60
+ tqdm.write(f'{idx}: {res_sentences[0]}')
61
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
62
+ out_text_list += out_text
63
+ torch.cuda.empty_cache()
64
+
65
+ dataset = dataset.add_column("out_text", out_text_list)
66
+ dataset = dataset.map(map_output, load_from_cache_file=False)
67
+ dataset = dataset.to_pandas()
68
+
69
+ print(dataset)
70
+ dataset.to_csv('tmp.csv')
71
+
72
+ # binary
73
+ acc = accuracy_score(dataset["label"], dataset["pred"])
74
+ f1 = f1_score(dataset["label"], dataset["pred"], average="binary")
75
+
76
+ # multi-class
77
+ pred, label = binary2multi(dataset)
78
+
79
+ print(f"\n|| Acc: {acc} || F1 binary: {f1} ||\n")
80
+ print(classification_report(label, pred, digits=4, target_names=['price or not', 'price up', 'price stable',
81
+ 'price down', 'price past', 'price future',
82
+ 'event past', 'event future', 'asset comp']))
83
+
84
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/ner.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from seqeval.metrics import classification_report
2
+ from datasets import load_dataset, load_from_disk
3
+ from tqdm import tqdm
4
+ import datasets
5
+ import torch
6
+ from torch.utils.data import DataLoader
7
+ from functools import partial
8
+ import re
9
+ import sys
10
+ import numpy as np
11
+ from fingpt.FinGPT_Benchmark.utils import *
12
+ from pathlib import Path
13
+ sys.path.append('../')
14
+
15
+ ent_dict = {
16
+ 'PER': 'person',
17
+ 'ORG': 'organization',
18
+ 'LOC': 'location',
19
+ }
20
+ ent_dict_rev = {v: k for k, v in ent_dict.items()}
21
+
22
+
23
+ def cvt_text_to_pred(tokens, text):
24
+
25
+ preds = ['O' for _ in range(len(tokens))]
26
+ for pred_txt in text.lower().strip('.').split(','):
27
+
28
+ pred_match = re.match(r'^(.*) is an? (.*)$', pred_txt)
29
+ if pred_match is not None:
30
+ entity, entity_type = pred_match.group(1).strip(), pred_match.group(2).strip()
31
+ entity_pred = ent_dict_rev.get(entity_type, 'O')
32
+ entity_tokens = entity.split()
33
+
34
+ n = len(entity_tokens)
35
+ for i in range(len(tokens) - n + 1):
36
+ if tokens[i:i+n] == entity_tokens and preds[i:i+n] == ['O'] * n:
37
+ preds[i:i+n] = ['B-' + entity_pred] + ['I-' + entity_pred] * (n-1)
38
+ break
39
+ else:
40
+ print(pred_txt)
41
+
42
+ return preds
43
+
44
+
45
+ def map_output(feature):
46
+
47
+ tokens = feature['input'].lower().split()
48
+ label = cvt_text_to_pred(tokens, feature['output'])
49
+ pred = cvt_text_to_pred(tokens, feature['out_text'])
50
+
51
+ return {'label': label, 'pred': pred}
52
+
53
+
54
+ def test_ner(args, model, tokenizer):
55
+
56
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/fingpt-ner')['test']
57
+ dataset = dataset.map(partial(test_mapping, args), load_from_cache_file=False)
58
+
59
+ def collate_fn(batch):
60
+ inputs = tokenizer(
61
+ [f["prompt"] for f in batch], return_tensors='pt',
62
+ padding=True, max_length=args.max_length,
63
+ return_token_type_ids=False
64
+ )
65
+ return inputs
66
+
67
+ dataloader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False)
68
+
69
+ out_text_list = []
70
+ log_interval = len(dataloader) // 5
71
+
72
+ for idx, inputs in enumerate(tqdm(dataloader)):
73
+ inputs = {key: value.to(model.device) for key, value in inputs.items()}
74
+ res = model.generate(**inputs, max_length=args.max_length, eos_token_id=tokenizer.eos_token_id)
75
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
76
+ if (idx + 1) % log_interval == 0:
77
+ tqdm.write(f'{idx}: {res_sentences[0]}')
78
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
79
+ out_text_list += out_text
80
+ torch.cuda.empty_cache()
81
+
82
+ dataset = dataset.add_column("out_text", out_text_list)
83
+ dataset = dataset.map(map_output, load_from_cache_file=False)
84
+ dataset = dataset.to_pandas()
85
+
86
+ print(dataset)
87
+ dataset.to_csv('tmp.csv')
88
+
89
+ label = [d.tolist() for d in dataset['label']]
90
+ pred = [d.tolist() for d in dataset['pred']]
91
+
92
+ print(classification_report(label, pred, digits=4))
93
+
94
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/nwgi.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ from sklearn.metrics import accuracy_score,f1_score
5
+ from datasets import load_dataset, load_from_disk
6
+ from tqdm import tqdm
7
+ import datasets
8
+ import torch
9
+ from pathlib import Path
10
+
11
+ dic = {
12
+ 'strong negative':"negative",
13
+ 'moderately negative':"negative",
14
+ 'mildly negative':"neutral",
15
+ 'strong positive':"positive",
16
+ 'moderately positive':"positive",
17
+ 'mildly positive':'neutral',
18
+ 'neutral':'neutral',
19
+ }
20
+
21
+ def format_example(example: dict) -> dict:
22
+ context = f"Instruction: {example['instruction']}\n"
23
+ if example.get("input"):
24
+ context += f"Input: {example['input']}\n"
25
+ context += "Answer: "
26
+ target = example["output"]
27
+ return {"context": context, "target": target}
28
+
29
+ def change_target(x):
30
+ if 'positive' in x or 'Positive' in x:
31
+ return 'positive'
32
+ elif 'negative' in x or 'Negative' in x:
33
+ return 'negative'
34
+ else:
35
+ return 'neutral'
36
+
37
+ def test_nwgi(args, model, tokenizer, prompt_fun=None):
38
+ batch_size = args.batch_size
39
+ # dataset = load_dataset('oliverwang15/news_with_gpt_instructions')
40
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/news_with_gpt_instructions/')
41
+ dataset['output'] = dataset['label'].apply(lambda x:dic[x])
42
+
43
+ if prompt_fun is None:
44
+ dataset["instruction"] = "What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}."
45
+ # dataset["instruction"] = "What is the sentiment of this news? Please choose an answer from {strong negative/moderately negative/mildly negative/neutral/mildly positive/moderately positive/strong positive}."
46
+ else:
47
+ dataset["instruction"] = dataset.apply(prompt_fun, axis = 1)
48
+ dataset["input"] = dataset["news"]
49
+
50
+ dataset = dataset[['input', 'output', 'instruction']]
51
+ dataset[["context","target"]] = dataset.apply(format_example, axis = 1, result_type="expand")
52
+
53
+ # print example
54
+ print(f"\n\nPrompt example:\n{dataset['context'][0]}\n\n")
55
+
56
+ context = dataset['context'].tolist()
57
+
58
+ total_steps = dataset.shape[0]//batch_size + 1
59
+ print(f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")
60
+
61
+
62
+ out_text_list = []
63
+ for i in tqdm(range(total_steps)):
64
+ tmp_context = context[i* batch_size:(i+1)* batch_size]
65
+ tokens = tokenizer(tmp_context, return_tensors='pt', padding=True, max_length=512, return_token_type_ids=False)
66
+ # tokens.pop('token_type_ids')
67
+ for k in tokens.keys():
68
+ tokens[k] = tokens[k].cuda()
69
+ res = model.generate(**tokens, max_length=512, eos_token_id=tokenizer.eos_token_id)
70
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
71
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
72
+ out_text_list += out_text
73
+ torch.cuda.empty_cache()
74
+
75
+ dataset["out_text"] = out_text_list
76
+ dataset["new_target"] = dataset["target"].apply(change_target)
77
+ dataset["new_out"] = dataset["out_text"].apply(change_target)
78
+
79
+ acc = accuracy_score(dataset["new_target"], dataset["new_out"])
80
+ f1_macro = f1_score(dataset["new_target"], dataset["new_out"], average = "macro")
81
+ f1_micro = f1_score(dataset["new_target"], dataset["new_out"], average = "micro")
82
+ f1_weighted = f1_score(dataset["new_target"], dataset["new_out"], average = "weighted")
83
+
84
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
85
+
86
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/sentiment_templates.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ What is the sentiment of the input {type} from financial perspective?
2
+ Assign a sentiment category to this {type} related to finance.
3
+ Categorize the input {type}'s emotional tone into one of three groups.
4
+ Determine the sentiment expressed in the {type} from financial perspective.
5
+ Characterize the {type}'s sentiment using the following options.
external/FinGPT/fingpt/FinGPT_Benchmark/benchmarks/tfns.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ from sklearn.metrics import accuracy_score,f1_score
5
+ from datasets import load_dataset, load_from_disk
6
+ from tqdm import tqdm
7
+ import datasets
8
+ import torch
9
+ from pathlib import Path
10
+
11
+ dic = {
12
+ 0:"negative",
13
+ 1:'positive',
14
+ 2:'neutral',
15
+ }
16
+
17
+ def format_example(example: dict) -> dict:
18
+ context = f"Instruction: {example['instruction']}\n"
19
+ if example.get("input"):
20
+ context += f"Input: {example['input']}\n"
21
+ context += "Answer: "
22
+ target = example["output"]
23
+ return {"context": context, "target": target}
24
+
25
+ def change_target(x):
26
+ if 'positive' in x or 'Positive' in x:
27
+ return 'positive'
28
+ elif 'negative' in x or 'Negative' in x:
29
+ return 'negative'
30
+ else:
31
+ return 'neutral'
32
+
33
+ def test_tfns(args, model, tokenizer, prompt_fun=None):
34
+ batch_size = args.batch_size
35
+ # dataset = load_dataset('zeroshot/twitter-financial-news-sentiment')
36
+ dataset = load_from_disk(Path(__file__).parent.parent / 'data/twitter-financial-news-sentiment')
37
+ dataset = dataset['validation']
38
+ dataset = dataset.to_pandas()
39
+ dataset['label'] = dataset['label'].apply(lambda x:dic[x])
40
+
41
+ if prompt_fun is None:
42
+ dataset["instruction"] = 'What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.'
43
+ else:
44
+ dataset["instruction"] = dataset.apply(prompt_fun, axis = 1)
45
+
46
+ dataset.columns = ['input', 'output', 'instruction']
47
+ dataset[["context","target"]] = dataset.apply(format_example, axis = 1, result_type="expand")
48
+
49
+ # print example
50
+ print(f"\n\nPrompt example:\n{dataset['context'][0]}\n\n")
51
+
52
+ context = dataset['context'].tolist()
53
+
54
+ total_steps = dataset.shape[0]//batch_size + 1
55
+ print(f"Total len: {len(context)}. Batchsize: {batch_size}. Total steps: {total_steps}")
56
+
57
+
58
+ out_text_list = []
59
+ for i in tqdm(range(total_steps)):
60
+ tmp_context = context[i* batch_size:(i+1)* batch_size]
61
+ tokens = tokenizer(tmp_context, return_tensors='pt', padding=True, max_length=512, return_token_type_ids=False)
62
+ # tokens.pop('token_type_ids')
63
+ for k in tokens.keys():
64
+ tokens[k] = tokens[k].cuda()
65
+ res = model.generate(**tokens, max_length=512, eos_token_id=tokenizer.eos_token_id)
66
+ res_sentences = [tokenizer.decode(i, skip_special_tokens=True) for i in res]
67
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
68
+ out_text_list += out_text
69
+ torch.cuda.empty_cache()
70
+
71
+ dataset["out_text"] = out_text_list
72
+ dataset["new_target"] = dataset["target"].apply(change_target)
73
+ dataset["new_out"] = dataset["out_text"].apply(change_target)
74
+
75
+ acc = accuracy_score(dataset["new_target"], dataset["new_out"])
76
+ f1_macro = f1_score(dataset["new_target"], dataset["new_out"], average = "macro")
77
+ f1_micro = f1_score(dataset["new_target"], dataset["new_out"], average = "micro")
78
+ f1_weighted = f1_score(dataset["new_target"], dataset["new_out"], average = "weighted")
79
+
80
+ print(f"Acc: {acc}. F1 macro: {f1_macro}. F1 micro: {f1_micro}. F1 weighted (BloombergGPT): {f1_weighted}. ")
81
+
82
+ return dataset
external/FinGPT/fingpt/FinGPT_Benchmark/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_micro_batch_size_per_gpu": "auto",
3
+ "train_batch_size": "auto",
4
+ "gradient_accumulation_steps": "auto",
5
+ "optimizer": {
6
+ "type": "ZeroOneAdam",
7
+ "params": {
8
+ "lr": "auto",
9
+ "weight_decay": "auto",
10
+ "bias_correction": false,
11
+ "var_freeze_step": 1000,
12
+ "var_update_scaler": 16,
13
+ "local_step_scaler": 1000,
14
+ "local_step_clipper": 16,
15
+ "cuda_aware": true,
16
+ "comm_backend_name": "nccl"
17
+ }
18
+ },
19
+ "scheduler": {
20
+ "type": "WarmupLR",
21
+ "params": {
22
+ "warmup_min_lr": 0,
23
+ "warmup_max_lr": "auto",
24
+ "warmup_num_steps": "auto"
25
+ }
26
+ },
27
+ "fp16": {
28
+ "enabled": true
29
+ },
30
+ "zero_optimization": {
31
+ "stage": 0
32
+ }
33
+ }
external/FinGPT/fingpt/FinGPT_Benchmark/config_hf.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_micro_batch_size_per_gpu": "auto",
3
+ "train_batch_size": "auto",
4
+ "gradient_accumulation_steps": "auto",
5
+ "fp16": {
6
+ "enabled": true
7
+ },
8
+ "zero_optimization": {
9
+ "stage": 0
10
+ }
11
+ }
external/FinGPT/fingpt/FinGPT_Benchmark/config_new.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_micro_batch_size_per_gpu": "auto",
3
+ "train_batch_size": "auto",
4
+ "gradient_accumulation_steps": "auto",
5
+ "optimizer": {
6
+ "type": "AdamW",
7
+ "params": {
8
+ "lr": "auto",
9
+ "weight_decay": "auto",
10
+ "betas": "auto",
11
+ "eps": "auto"
12
+ }
13
+ },
14
+ "scheduler": {
15
+ "type": "WarmupDecayLR",
16
+ "params": {
17
+ "last_batch_iteration": -1,
18
+ "total_num_steps": "auto",
19
+ "warmup_min_lr": "auto",
20
+ "warmup_max_lr": "auto",
21
+ "warmup_num_steps": "auto"
22
+ }
23
+ },
24
+ "fp16": {
25
+ "enabled": true,
26
+ "loss_scale": 0,
27
+ "loss_scale_window": 1000,
28
+ "initial_scale_power": 16,
29
+ "hysteresis": 2,
30
+ "min_loss_scale": 1
31
+ },
32
+ "zero_optimization": {
33
+ "stage": 0
34
+ }
35
+ }
external/FinGPT/fingpt/FinGPT_Benchmark/data/__init__.py ADDED
File without changes
external/FinGPT/fingpt/FinGPT_Benchmark/data/download.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ from pathlib import Path
3
+ import argparse
4
+
5
+ DATASETS = [
6
+ # source, destination
7
+ (('pauri32/fiqa-2018', None), 'fiqa-2018'),
8
+ (('FinGPT/fingpt-finred', None), 'fingpt-finred'),
9
+ (('zeroshot/twitter-financial-news-sentiment', None), 'twitter-financial-news-sentiment'),
10
+ (('oliverwang15/news_with_gpt_instructions', None), 'news_with_gpt_instructions'),
11
+ (('financial_phrasebank', 'sentences_50agree'), 'financial_phrasebank-sentences_50agree'),
12
+ (('FinGPT/fingpt-fiqa_qa', None), 'fingpt-fiqa_qa'),
13
+ (('FinGPT/fingpt-headline-cls', None), 'fingpt-headline-cls'),
14
+ (('FinGPT/fingpt-finred', None), 'fingpt-finred'),
15
+ (('FinGPT/fingpt-convfinqa', None), 'fingpt-convfinqa'),
16
+ (('FinGPT/fingpt-finred-cls', None), 'fingpt-finred-cls'),
17
+ (('FinGPT/fingpt-ner', None), 'fingpt-ner'),
18
+ (('FinGPT/fingpt-headline', None), 'fingpt-headline-instruct'),
19
+ (('FinGPT/fingpt-finred-re', None), 'fingpt-finred-re'),
20
+ (('FinGPT/fingpt-ner-cls', None), 'fingpt-ner-cls'),
21
+ (('FinGPT/fingpt-fineval', None), 'fingpt-fineval'),
22
+ (('FinGPT/fingpt-sentiment-cls', None), 'fingpt-sentiment-cls'),
23
+ ]
24
+
25
+ def download(no_cache: bool = False):
26
+ """Downloads all datasets to where the FinGPT library is located."""
27
+ data_dir = Path(__file__).parent
28
+
29
+ for src, dest in DATASETS:
30
+ if Path(data_dir / dest).is_dir() and not no_cache:
31
+ print(f"Dataset found at {data_dir / dest}, skipping")
32
+ continue
33
+ dataset = datasets.load_dataset(*src)
34
+ dataset.save_to_disk(data_dir / dest)
35
+
36
+ if __name__ == "__main__":
37
+ parser = argparse.ArgumentParser()
38
+ parser.add_argument("--no_cache", default=False, required=False, type=str, help="Redownloads all datasets if set to True")
39
+
40
+ args = parser.parse_args()
41
+ download(no_cache=args.no_cache)
external/FinGPT/fingpt/FinGPT_Benchmark/data/prepare_data.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
external/FinGPT/fingpt/FinGPT_Benchmark/demo.ipynb ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Read before you start:\n",
8
+ "\n",
9
+ "This notebook gives a test demo for all the LLMs we trained during phase2: Multi-Task Instruction Tuning.\n",
10
+ "\n",
11
+ "- LLMs: Llama2, Falcon, BLOOM, ChatGLM2, Qwen, MPT\n",
12
+ "- Tasks: Sentiment Analysis, Headline Classification, Named Entity Extraction, Relation Extraction\n",
13
+ "\n",
14
+ "All the models & instruction data samples used are also available in our huggingface organization. [https://huggingface.co/FinGPT]\n",
15
+ "\n",
16
+ "Models trained in phase1&3 are not provided, as MT-models cover most of their capacity. Feel free to train your own models based on the tasks you want.\n",
17
+ "\n",
18
+ "Due to the limited diversity of the financial tasks and datasets we used, models might not response correctly to out-of-scope instructions. We'll delve into the generalization ability more in our future works."
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 1,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "# First choose to load data/model from huggingface or local space\n",
28
+ "\n",
29
+ "FROM_REMOTE = False"
30
+ ]
31
+ },
32
+ {
33
+ "cell_type": "code",
34
+ "execution_count": 2,
35
+ "metadata": {},
36
+ "outputs": [
37
+ {
38
+ "name": "stdout",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "[2023-10-15 20:44:54,084] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
42
+ ]
43
+ }
44
+ ],
45
+ "source": [
46
+ "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
47
+ "from peft import PeftModel\n",
48
+ "from utils import *"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 3,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "import logging\n",
58
+ "# Suppress Warnings during inference\n",
59
+ "logging.getLogger(\"transformers\").setLevel(logging.ERROR)"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": 4,
65
+ "metadata": {},
66
+ "outputs": [],
67
+ "source": [
68
+ "demo_tasks = [\n",
69
+ " 'Financial Sentiment Analysis',\n",
70
+ " 'Financial Relation Extraction',\n",
71
+ " 'Financial Headline Classification',\n",
72
+ " 'Financial Named Entity Recognition',\n",
73
+ "]\n",
74
+ "demo_inputs = [\n",
75
+ " \"Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\",\n",
76
+ " \"Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel , PeopleSoft , JD Edwards , E-Business Suite , Oracle Database , Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\",\n",
77
+ " 'april gold down 20 cents to settle at $1,116.10/oz',\n",
78
+ " 'Subject to the terms and conditions of this Agreement , Bank agrees to lend to Borrower , from time to time prior to the Commitment Termination Date , equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").',\n",
79
+ "]\n",
80
+ "demo_instructions = [\n",
81
+ " 'What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.',\n",
82
+ " 'Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.',\n",
83
+ " 'Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.',\n",
84
+ " 'Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.',\n",
85
+ "]"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 5,
91
+ "metadata": {},
92
+ "outputs": [],
93
+ "source": [
94
+ "def load_model(base_model, peft_model, from_remote=False):\n",
95
+ " \n",
96
+ " model_name = parse_model_name(base_model, from_remote)\n",
97
+ "\n",
98
+ " model = AutoModelForCausalLM.from_pretrained(\n",
99
+ " model_name, trust_remote_code=True, \n",
100
+ " device_map=\"auto\",\n",
101
+ " )\n",
102
+ " model.model_parallel = True\n",
103
+ "\n",
104
+ " tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
105
+ " \n",
106
+ " tokenizer.padding_side = \"left\"\n",
107
+ " if base_model == 'qwen':\n",
108
+ " tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids('<|endoftext|>')\n",
109
+ " tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids('<|extra_0|>')\n",
110
+ " if not tokenizer.pad_token or tokenizer.pad_token_id == tokenizer.eos_token_id:\n",
111
+ " tokenizer.add_special_tokens({'pad_token': '[PAD]'})\n",
112
+ " model.resize_token_embeddings(len(tokenizer))\n",
113
+ " \n",
114
+ " model = PeftModel.from_pretrained(model, peft_model)\n",
115
+ " model = model.eval()\n",
116
+ " return model, tokenizer\n",
117
+ "\n",
118
+ "\n",
119
+ "def test_demo(model, tokenizer):\n",
120
+ "\n",
121
+ " for task_name, input, instruction in zip(demo_tasks, demo_inputs, demo_instructions):\n",
122
+ " prompt = 'Instruction: {instruction}\\nInput: {input}\\nAnswer: '.format(\n",
123
+ " input=input, \n",
124
+ " instruction=instruction\n",
125
+ " )\n",
126
+ " inputs = tokenizer(\n",
127
+ " prompt, return_tensors='pt',\n",
128
+ " padding=True, max_length=512,\n",
129
+ " return_token_type_ids=False\n",
130
+ " )\n",
131
+ " inputs = {key: value.to(model.device) for key, value in inputs.items()}\n",
132
+ " res = model.generate(\n",
133
+ " **inputs, max_length=512, do_sample=False,\n",
134
+ " eos_token_id=tokenizer.eos_token_id\n",
135
+ " )\n",
136
+ " output = tokenizer.decode(res[0], skip_special_tokens=True)\n",
137
+ " print(f\"\\n==== {task_name} ====\\n\")\n",
138
+ " print(output)\n",
139
+ " "
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "markdown",
144
+ "metadata": {},
145
+ "source": [
146
+ "# Llama2-7B"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 6,
152
+ "metadata": {},
153
+ "outputs": [
154
+ {
155
+ "data": {
156
+ "application/json": {
157
+ "ascii": false,
158
+ "bar_format": null,
159
+ "colour": null,
160
+ "elapsed": 0.006228446960449219,
161
+ "initial": 0,
162
+ "n": 0,
163
+ "ncols": null,
164
+ "nrows": null,
165
+ "postfix": null,
166
+ "prefix": "Loading checkpoint shards",
167
+ "rate": null,
168
+ "total": 2,
169
+ "unit": "it",
170
+ "unit_divisor": 1000,
171
+ "unit_scale": false
172
+ },
173
+ "application/vnd.jupyter.widget-view+json": {
174
+ "model_id": "0d58aff745fb486780792c86384fe702",
175
+ "version_major": 2,
176
+ "version_minor": 0
177
+ },
178
+ "text/plain": [
179
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
180
+ ]
181
+ },
182
+ "metadata": {},
183
+ "output_type": "display_data"
184
+ },
185
+ {
186
+ "name": "stderr",
187
+ "output_type": "stream",
188
+ "text": [
189
+ "Using pad_token, but it is not set yet.\n",
190
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:2436: UserWarning: `max_length` is ignored when `padding`=`True` and there is no truncation strategy. To pad to max length, use `padding='max_length'`.\n",
191
+ " warnings.warn(\n",
192
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:362: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
193
+ " warnings.warn(\n",
194
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:367: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
195
+ " warnings.warn(\n"
196
+ ]
197
+ },
198
+ {
199
+ "name": "stdout",
200
+ "output_type": "stream",
201
+ "text": [
202
+ "\n",
203
+ "==== Financial Sentiment Analysis ====\n",
204
+ "\n",
205
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
206
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
207
+ "Answer: positive\n",
208
+ "\n",
209
+ "==== Financial Relation Extraction ====\n",
210
+ "\n",
211
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
212
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel , PeopleSoft , JD Edwards , E-Business Suite , Oracle Database , Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
213
+ "Answer: product_or_material_produced: PeopleSoft, software; parent_organization: Siebel, Oracle Corporation; industry: Oracle Corporation, software; product_or_material_produced: Oracle Corporation, software; product_or_material_produced: Oracle Corporation, software\n",
214
+ "\n",
215
+ "==== Financial Headline Classification ====\n",
216
+ "\n",
217
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
218
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
219
+ "Answer: Yes\n",
220
+ "\n",
221
+ "==== Financial Named Entity Recognition ====\n",
222
+ "\n",
223
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
224
+ "Input: Subject to the terms and conditions of this Agreement , Bank agrees to lend to Borrower , from time to time prior to the Commitment Termination Date , equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
225
+ "Answer: Bank is an organization, Borrower is a person.\n"
226
+ ]
227
+ }
228
+ ],
229
+ "source": [
230
+ "base_model = 'llama2'\n",
231
+ "peft_model = 'FinGPT/fingpt-mt_llama2-7b_lora' if FROM_REMOTE else 'finetuned_models/MT-llama2-linear_202309241345'\n",
232
+ "\n",
233
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
234
+ "test_demo(model, tokenizer)"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "markdown",
239
+ "metadata": {},
240
+ "source": [
241
+ "# Qwen-7B"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": 7,
247
+ "metadata": {},
248
+ "outputs": [
249
+ {
250
+ "name": "stderr",
251
+ "output_type": "stream",
252
+ "text": [
253
+ "The model is automatically converting to bf16 for faster inference. If you want to disable the automatic precision, please manually add bf16/fp16/fp32=True to \"AutoModelForCausalLM.from_pretrained\".\n",
254
+ "Try importing flash-attention for faster inference...\n",
255
+ "Warning: import flash_attn rotary fail, please install FlashAttention rotary to get higher efficiency https://github.com/Dao-AILab/flash-attention/tree/main/csrc/rotary\n",
256
+ "Warning: import flash_attn rms_norm fail, please install FlashAttention layer_norm to get higher efficiency https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm\n",
257
+ "Warning: import flash_attn fail, please install FlashAttention to get higher efficiency https://github.com/Dao-AILab/flash-attention\n"
258
+ ]
259
+ },
260
+ {
261
+ "data": {
262
+ "application/json": {
263
+ "ascii": false,
264
+ "bar_format": null,
265
+ "colour": null,
266
+ "elapsed": 0.004647493362426758,
267
+ "initial": 0,
268
+ "n": 0,
269
+ "ncols": null,
270
+ "nrows": null,
271
+ "postfix": null,
272
+ "prefix": "Loading checkpoint shards",
273
+ "rate": null,
274
+ "total": 8,
275
+ "unit": "it",
276
+ "unit_divisor": 1000,
277
+ "unit_scale": false
278
+ },
279
+ "application/vnd.jupyter.widget-view+json": {
280
+ "model_id": "e1978e69ea784778acd1813cc0647c3e",
281
+ "version_major": 2,
282
+ "version_minor": 0
283
+ },
284
+ "text/plain": [
285
+ "Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
286
+ ]
287
+ },
288
+ "metadata": {},
289
+ "output_type": "display_data"
290
+ },
291
+ {
292
+ "name": "stderr",
293
+ "output_type": "stream",
294
+ "text": [
295
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:367: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.8` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n",
296
+ " warnings.warn(\n",
297
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:377: UserWarning: `do_sample` is set to `False`. However, `top_k` is set to `0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_k`.\n",
298
+ " warnings.warn(\n"
299
+ ]
300
+ },
301
+ {
302
+ "name": "stdout",
303
+ "output_type": "stream",
304
+ "text": [
305
+ "\n",
306
+ "==== Financial Sentiment Analysis ====\n",
307
+ "\n",
308
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
309
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
310
+ "Answer: positive\n",
311
+ "\n",
312
+ "==== Financial Relation Extraction ====\n",
313
+ "\n",
314
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
315
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel , PeopleSoft , JD Edwards , E-Business Suite , Oracle Database , Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
316
+ "Answer: subsidiary: PeopleSoft, JD Edwards\n",
317
+ "\n",
318
+ "==== Financial Headline Classification ====\n",
319
+ "\n",
320
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
321
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
322
+ "Answer: Yes\n",
323
+ "\n",
324
+ "==== Financial Named Entity Recognition ====\n",
325
+ "\n",
326
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
327
+ "Input: Subject to the terms and conditions of this Agreement , Bank agrees to lend to Borrower , from time to time prior to the Commitment Termination Date , equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
328
+ "Answer: Bank is an organization, Borrower is a person.\n"
329
+ ]
330
+ }
331
+ ],
332
+ "source": [
333
+ "base_model = 'qwen'\n",
334
+ "peft_model = 'FinGPT/fingpt-mt_qwen-7b_lora' if FROM_REMOTE else 'finetuned_models/MT-qwen-linear_202309221011'\n",
335
+ "\n",
336
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
337
+ "test_demo(model, tokenizer)"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "markdown",
342
+ "metadata": {},
343
+ "source": [
344
+ "# Falcon-7B"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 8,
350
+ "metadata": {},
351
+ "outputs": [
352
+ {
353
+ "data": {
354
+ "application/json": {
355
+ "ascii": false,
356
+ "bar_format": null,
357
+ "colour": null,
358
+ "elapsed": 0.004422426223754883,
359
+ "initial": 0,
360
+ "n": 0,
361
+ "ncols": null,
362
+ "nrows": null,
363
+ "postfix": null,
364
+ "prefix": "Loading checkpoint shards",
365
+ "rate": null,
366
+ "total": 2,
367
+ "unit": "it",
368
+ "unit_divisor": 1000,
369
+ "unit_scale": false
370
+ },
371
+ "application/vnd.jupyter.widget-view+json": {
372
+ "model_id": "e12fadfbaa6048538bbeef26ed563b28",
373
+ "version_major": 2,
374
+ "version_minor": 0
375
+ },
376
+ "text/plain": [
377
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
378
+ ]
379
+ },
380
+ "metadata": {},
381
+ "output_type": "display_data"
382
+ },
383
+ {
384
+ "name": "stderr",
385
+ "output_type": "stream",
386
+ "text": [
387
+ "Using pad_token, but it is not set yet.\n",
388
+ "/root/.conda/envs/torch2/lib/python3.9/site-packages/transformers/generation/utils.py:1411: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation )\n",
389
+ " warnings.warn(\n"
390
+ ]
391
+ },
392
+ {
393
+ "name": "stdout",
394
+ "output_type": "stream",
395
+ "text": [
396
+ "\n",
397
+ "==== Financial Sentiment Analysis ====\n",
398
+ "\n",
399
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
400
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
401
+ "Answer: positive\n",
402
+ "\n",
403
+ "==== Financial Relation Extraction ====\n",
404
+ "\n",
405
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
406
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel, PeopleSoft, JD Edwards, E-Business Suite, Oracle Database, Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
407
+ "Answer: product_or_material_produced: PeopleSoft, Oracle Database\n",
408
+ "\n",
409
+ "==== Financial Headline Classification ====\n",
410
+ "\n",
411
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
412
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
413
+ "Answer: Yes\n",
414
+ "\n",
415
+ "==== Financial Named Entity Recognition ====\n",
416
+ "\n",
417
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
418
+ "Input: Subject to the terms and conditions of this Agreement, Bank agrees to lend to Borrower, from time to time prior to the Commitment Termination Date, equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
419
+ "Answer: Bank is an organization, Borrower is a person.\n"
420
+ ]
421
+ }
422
+ ],
423
+ "source": [
424
+ "base_model = 'falcon'\n",
425
+ "peft_model = 'FinGPT/fingpt-mt_falcon-7b_lora' if FROM_REMOTE else 'finetuned_models/MT-falcon-linear_202309210126'\n",
426
+ "\n",
427
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
428
+ "test_demo(model, tokenizer)"
429
+ ]
430
+ },
431
+ {
432
+ "cell_type": "markdown",
433
+ "metadata": {},
434
+ "source": [
435
+ "# ChatGLM2-6B"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "code",
440
+ "execution_count": 9,
441
+ "metadata": {},
442
+ "outputs": [
443
+ {
444
+ "data": {
445
+ "application/json": {
446
+ "ascii": false,
447
+ "bar_format": null,
448
+ "colour": null,
449
+ "elapsed": 0.004460573196411133,
450
+ "initial": 0,
451
+ "n": 0,
452
+ "ncols": null,
453
+ "nrows": null,
454
+ "postfix": null,
455
+ "prefix": "Loading checkpoint shards",
456
+ "rate": null,
457
+ "total": 7,
458
+ "unit": "it",
459
+ "unit_divisor": 1000,
460
+ "unit_scale": false
461
+ },
462
+ "application/vnd.jupyter.widget-view+json": {
463
+ "model_id": "8bddd025a6514946b5f07f55e9c38f58",
464
+ "version_major": 2,
465
+ "version_minor": 0
466
+ },
467
+ "text/plain": [
468
+ "Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
469
+ ]
470
+ },
471
+ "metadata": {},
472
+ "output_type": "display_data"
473
+ },
474
+ {
475
+ "name": "stdout",
476
+ "output_type": "stream",
477
+ "text": [
478
+ "\n",
479
+ "==== Financial Sentiment Analysis ====\n",
480
+ "\n",
481
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
482
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
483
+ "Answer: positive\n",
484
+ "\n",
485
+ "==== Financial Relation Extraction ====\n",
486
+ "\n",
487
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
488
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel , PeopleSoft , JD Edwards , E-Business Suite , Oracle Database , Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
489
+ "Answer: product_or_material_produced: Oracle, Oracle Database; developer: Oracle, Oracle; product_or_material_produced: Oracle, Oracle Database\n",
490
+ "\n",
491
+ "==== Financial Headline Classification ====\n",
492
+ "\n",
493
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
494
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
495
+ "Answer: Yes\n",
496
+ "\n",
497
+ "==== Financial Named Entity Recognition ====\n",
498
+ "\n",
499
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
500
+ "Input: Subject to the terms and conditions of this Agreement , Bank agrees to lend to Borrower , from time to time prior to the Commitment Termination Date , equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
501
+ "Answer: Bank is an organization, Borrower is a person.\n"
502
+ ]
503
+ }
504
+ ],
505
+ "source": [
506
+ "base_model = 'chatglm2'\n",
507
+ "peft_model = 'FinGPT/fingpt-mt_chatglm2-6b_lora' if FROM_REMOTE else 'finetuned_models/MT-chatglm2-linear_202309201120'\n",
508
+ "\n",
509
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
510
+ "test_demo(model, tokenizer)"
511
+ ]
512
+ },
513
+ {
514
+ "cell_type": "markdown",
515
+ "metadata": {},
516
+ "source": [
517
+ "# BLOOM-7B1"
518
+ ]
519
+ },
520
+ {
521
+ "cell_type": "code",
522
+ "execution_count": 10,
523
+ "metadata": {},
524
+ "outputs": [
525
+ {
526
+ "data": {
527
+ "application/json": {
528
+ "ascii": false,
529
+ "bar_format": null,
530
+ "colour": null,
531
+ "elapsed": 0.004486799240112305,
532
+ "initial": 0,
533
+ "n": 0,
534
+ "ncols": null,
535
+ "nrows": null,
536
+ "postfix": null,
537
+ "prefix": "Loading checkpoint shards",
538
+ "rate": null,
539
+ "total": 2,
540
+ "unit": "it",
541
+ "unit_divisor": 1000,
542
+ "unit_scale": false
543
+ },
544
+ "application/vnd.jupyter.widget-view+json": {
545
+ "model_id": "32ee0b5e2df049a0b9e458c779e09a68",
546
+ "version_major": 2,
547
+ "version_minor": 0
548
+ },
549
+ "text/plain": [
550
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
551
+ ]
552
+ },
553
+ "metadata": {},
554
+ "output_type": "display_data"
555
+ },
556
+ {
557
+ "name": "stdout",
558
+ "output_type": "stream",
559
+ "text": [
560
+ "\n",
561
+ "==== Financial Sentiment Analysis ====\n",
562
+ "\n",
563
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
564
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
565
+ "Answer: positive\n",
566
+ "\n",
567
+ "==== Financial Relation Extraction ====\n",
568
+ "\n",
569
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
570
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel , PeopleSoft , JD Edwards , E-Business Suite , Oracle Database , Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
571
+ "Answer: product_or_material_produced: software provider, Software\n",
572
+ "\n",
573
+ "==== Financial Headline Classification ====\n",
574
+ "\n",
575
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
576
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
577
+ "Answer: Yes\n",
578
+ "\n",
579
+ "==== Financial Named Entity Recognition ====\n",
580
+ "\n",
581
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
582
+ "Input: Subject to the terms and conditions of this Agreement , Bank agrees to lend to Borrower , from time to time prior to the Commitment Termination Date , equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
583
+ "Answer: Bank is an organization, Borrower is a person.\n"
584
+ ]
585
+ }
586
+ ],
587
+ "source": [
588
+ "base_model = 'bloom'\n",
589
+ "peft_model = 'FinGPT/fingpt-mt_bloom-7b1_lora' if FROM_REMOTE else 'finetuned_models/MT-bloom-linear_202309211510'\n",
590
+ "\n",
591
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
592
+ "test_demo(model, tokenizer)"
593
+ ]
594
+ },
595
+ {
596
+ "cell_type": "markdown",
597
+ "metadata": {},
598
+ "source": [
599
+ "# MPT-7B"
600
+ ]
601
+ },
602
+ {
603
+ "cell_type": "code",
604
+ "execution_count": 11,
605
+ "metadata": {},
606
+ "outputs": [
607
+ {
608
+ "name": "stderr",
609
+ "output_type": "stream",
610
+ "text": [
611
+ "/root/.cache/huggingface/modules/transformers_modules/mpt-7b-peft-compatible/attention.py:148: UserWarning: Using `attn_impl: torch`. If your model does not use `alibi` or `prefix_lm` we recommend using `attn_impl: flash` otherwise we recommend using `attn_impl: triton`.\n",
612
+ " warnings.warn('Using `attn_impl: torch`. If your model does not use `alibi` or ' + '`prefix_lm` we recommend using `attn_impl: flash` otherwise ' + 'we recommend using `attn_impl: triton`.')\n",
613
+ "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n"
614
+ ]
615
+ },
616
+ {
617
+ "data": {
618
+ "application/json": {
619
+ "ascii": false,
620
+ "bar_format": null,
621
+ "colour": null,
622
+ "elapsed": 0.004449605941772461,
623
+ "initial": 0,
624
+ "n": 0,
625
+ "ncols": null,
626
+ "nrows": null,
627
+ "postfix": null,
628
+ "prefix": "Loading checkpoint shards",
629
+ "rate": null,
630
+ "total": 2,
631
+ "unit": "it",
632
+ "unit_divisor": 1000,
633
+ "unit_scale": false
634
+ },
635
+ "application/vnd.jupyter.widget-view+json": {
636
+ "model_id": "0440bc96112344c493c8a1f5dd76f319",
637
+ "version_major": 2,
638
+ "version_minor": 0
639
+ },
640
+ "text/plain": [
641
+ "Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
642
+ ]
643
+ },
644
+ "metadata": {},
645
+ "output_type": "display_data"
646
+ },
647
+ {
648
+ "name": "stderr",
649
+ "output_type": "stream",
650
+ "text": [
651
+ "Using pad_token, but it is not set yet.\n"
652
+ ]
653
+ },
654
+ {
655
+ "name": "stdout",
656
+ "output_type": "stream",
657
+ "text": [
658
+ "\n",
659
+ "==== Financial Sentiment Analysis ====\n",
660
+ "\n",
661
+ "Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}.\n",
662
+ "Input: Glaxo's ViiV Healthcare Signs China Manufacturing Deal With Desano\n",
663
+ "Answer: positive\n",
664
+ "\n",
665
+ "==== Financial Relation Extraction ====\n",
666
+ "\n",
667
+ "Instruction: Given phrases that describe the relationship between two words/phrases as options, extract the word/phrase pair and the corresponding lexical relationship between them from the input text. The output format should be \"relation1: word1, word2; relation2: word3, word4\". Options: product/material produced, manufacturer, distributed by, industry, position held, original broadcaster, owned by, founded by, distribution format, headquarters location, stock exchange, currency, parent organization, chief executive officer, director/manager, owner of, operator, member of, employer, chairperson, platform, subsidiary, legal form, publisher, developer, brand, business division, location of formation, creator.\n",
668
+ "Input: Wednesday, July 8, 2015 10:30AM IST (5:00AM GMT) Rimini Street Comment on Oracle Litigation Las Vegas, United States Rimini Street, Inc., the leading independent provider of enterprise software support for SAP AG’s (NYSE:SAP) Business Suite and BusinessObjects software and Oracle Corporation’s (NYSE:ORCL) Siebel, PeopleSoft, JD Edwards, E-Business Suite, Oracle Database, Hyperion and Oracle Retail software, today issued a statement on the Oracle litigation.\n",
669
+ "Answer: product_or_material_produced: Hyperion, software\n",
670
+ "\n",
671
+ "==== Financial Headline Classification ====\n",
672
+ "\n",
673
+ "Instruction: Does the news headline talk about price in the past? Please choose an answer from {Yes/No}.\n",
674
+ "Input: april gold down 20 cents to settle at $1,116.10/oz\n",
675
+ "Answer: Yes\n",
676
+ "\n",
677
+ "==== Financial Named Entity Recognition ====\n",
678
+ "\n",
679
+ "Instruction: Please extract entities and their types from the input sentence, entity types should be chosen from {person/organization/location}.\n",
680
+ "Input: Subject to the terms and conditions of this Agreement, Bank agrees to lend to Borrower, from time to time prior to the Commitment Termination Date, equipment advances ( each an \" Equipment Advance \" and collectively the \" Equipment Advances \").\n",
681
+ "Answer: Bank is an organization, Borrower is a person.\n"
682
+ ]
683
+ }
684
+ ],
685
+ "source": [
686
+ "base_model = 'mpt'\n",
687
+ "peft_model = 'FinGPT/fingpt-mt_mpt-7b_lora' if FROM_REMOTE else 'finetuned_models/MT-mpt-linear_202309230221'\n",
688
+ "\n",
689
+ "model, tokenizer = load_model(base_model, peft_model, FROM_REMOTE)\n",
690
+ "test_demo(model, tokenizer)"
691
+ ]
692
+ }
693
+ ],
694
+ "metadata": {
695
+ "kernelspec": {
696
+ "display_name": "torch2",
697
+ "language": "python",
698
+ "name": "torch2"
699
+ },
700
+ "language_info": {
701
+ "codemirror_mode": {
702
+ "name": "ipython",
703
+ "version": 3
704
+ },
705
+ "file_extension": ".py",
706
+ "mimetype": "text/x-python",
707
+ "name": "python",
708
+ "nbconvert_exporter": "python",
709
+ "pygments_lexer": "ipython3",
710
+ "version": "3.9.12"
711
+ }
712
+ },
713
+ "nbformat": 4,
714
+ "nbformat_minor": 4
715
+ }
external/FinGPT/fingpt/FinGPT_Benchmark/readme.md ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FinGPT's Benchmark
2
+
3
+ [FinGPT: Instruction Tuning Benchmark for Open-Source Large Language Models in Financial Datasets
4
+ ](https://arxiv.org/abs/2310.04793)
5
+
6
+
7
+ The datasets we used, and the multi-task financial LLMs models are available at <https://huggingface.co/FinGPT>
8
+
9
+ ---
10
+
11
+ Before you start, make sure you have the correct versions of the key packages installed.
12
+ ```
13
+ transformers==4.32.0
14
+ peft==0.5.0
15
+ ```
16
+
17
+ [Weights & Biases](https://wandb.ai/site) is a good tool for tracking model training and inference, you need to register, get a free API, and create a new project.
18
+
19
+ wandb produces some nice charts like the following:
20
+
21
+ <img width="440" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/04a08b3d-58e3-47aa-8b07-3ec6ff9dfea4">
22
+ <img width="440" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/f207a64b-622d-4a41-8e0f-1959a2d25450">
23
+ <img width="440" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/e7699c64-7c3c-4130-94b3-59688631120a">
24
+ <img width="440" alt="image" src="https://github.com/AI4Finance-Foundation/FinGPT/assets/31713746/65ca7853-3d33-4856-80e5-f03476efcc78">
25
+
26
+
27
+ ## Ready-to-use Demo
28
+
29
+ For users who want ready-to-use financial multi-task language models, please refer to `demo.ipynb`.
30
+ Following this notebook, you're able to test Llama2-7B, ChatGLM2-6B, MPT-7B, BLOOM-7B, Falcon-7B, or Qwen-7B with any of the following tasks:
31
+ - Financial Sentiment Analysis
32
+ - Headline Classification
33
+ - Named Entity Recognition
34
+ - Financial Relation Extraction
35
+
36
+ We suggest users follow the instruction template and task prompts that we used in our training process. Demos are shown in `demo.ipynb`. Due to the limited diversity of the financial tasks and datasets we used, models might not respond correctly to out-of-scope instructions. We'll delve into the generalization ability more in our future works.
37
+
38
+ ## Prepare Data & Base Models
39
+
40
+ For the base models we used, we recommend pre-downloading them and save to `base_models/`.
41
+
42
+ Refer to the `parse_model_name()` function in `utils.py` for the huggingface models we used for each LLM. (We use base models rather than any instruction-tuned version or chat version, except for ChatGLM2)
43
+
44
+ ---
45
+
46
+ For the datasets we used, download our processed instruction tuning data from huggingface. Take FinRED dataset as an example:
47
+ ```
48
+ import datasets
49
+
50
+ dataset = datasets.load_dataset('FinGPT/fingpt-finred')
51
+ # save to local disk space (recommended)
52
+ dataset.save_to_disk('data/fingpt-finred')
53
+ ```
54
+ Then `finred` became an available task option for training.
55
+
56
+ We use different datasets at different phases of our instruction tuning paradigm.
57
+ - Task-specific Instruction Tuning: `sentiment-train / finred-re / ner / headline`
58
+ - Multi-task Instruction Tuning: `sentiment-train & finred & ner & headline`
59
+ - Zero-shot Aimed Instruction Tuning: `finred-cls & ner-cls & headline-cls -> sentiment-cls (test)`
60
+
61
+ You may download the datasets according to your needs. We also provide processed datasets for ConvFinQA and FinEval, but they are not used in our final work.
62
+
63
+ ### prepare data from scratch
64
+ To prepare training data from raw data, you should follow `data/prepate_data.ipynb`.
65
+
66
+ We don't include any source data from other open-source financial datasets in our repository. So if you want to do it from scratch, you need to find the corresponding source data and put them in `data/` before you start.
67
+
68
+ ---
69
+
70
+ ## Instruction Tuning
71
+
72
+ `train.sh` contains examples of instruction tuning with this repo.
73
+ If you don't have training data & base models in your local disk, pass `--from_remote true` in addition.
74
+
75
+ ### Task-specific Instruction Tuning
76
+ ```
77
+ #chatglm2
78
+ deepspeed train_lora.py \
79
+ --run_name headline-chatglm2-linear \
80
+ --base_model chatglm2 \
81
+ --dataset headline \
82
+ --max_length 512 \
83
+ --batch_size 4 \
84
+ --learning_rate 1e-4 \
85
+ --num_epochs 8
86
+ ```
87
+
88
+ Please be aware that "localhost:2" refers to a particular GPU device.
89
+
90
+ ```
91
+ #llama2-13b
92
+ deepspeed -i "localhost:2" train_lora.py \
93
+ --run_name sentiment-llama2-13b-8epoch-16batch \
94
+ --base_model llama2-13b-nr \
95
+ --dataset sentiment-train \
96
+ --max_length 512 \
97
+ --batch_size 16 \
98
+ --learning_rate 1e-5 \
99
+ --num_epochs 8 \
100
+ --from_remote True \
101
+ >train.log 2>&1 &
102
+ ```
103
+
104
+ use
105
+ ```
106
+ tail -f train.log
107
+ ```
108
+ to check the training log
109
+
110
+ ### Multi-task Instruction Tuning
111
+ ```
112
+ deepspeed train_lora.py \
113
+ --run_name MT-falcon-linear \
114
+ --base_model falcon \
115
+ --dataset sentiment-train,headline,finred*3,ner*15 \
116
+ --max_length 512 \
117
+ --batch_size 4 \
118
+ --learning_rate 1e-4 \
119
+ --num_epochs 4
120
+ ```
121
+ ### Zero-shot Aimed Instruction Tuning
122
+ ```
123
+ deepspeed train_lora.py \
124
+ --run_name GRCLS-sentiment-falcon-linear-small \
125
+ --base_model falcon \
126
+ --test_dataset sentiment-cls-instruct \
127
+ --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
128
+ --max_length 512 \
129
+ --batch_size 4 \
130
+ --learning_rate 1e-4 \
131
+ --num_epochs 1 \
132
+ --log_interval 10 \
133
+ --warmup_ratio 0 \
134
+ --scheduler linear \
135
+ --evaluation_strategy steps \
136
+ --eval_steps 100 \
137
+ --ds_config config_hf.json
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Evaluation for Financial Tasks
143
+
144
+ Refer to `Benchmarks/evaluate.sh` for evaluation script on all Financial Tasks.
145
+ You can evaluate your trained model on multiple tasks together. For example:
146
+ ```
147
+ python benchmarks.py \
148
+ --dataset fpb,fiqa,tfns,nwgi,headline,ner,re \
149
+ --base_model llama2 \
150
+ --peft_model ../finetuned_models/MT-llama2-linear_202309241345 \
151
+ --batch_size 8 \
152
+ --max_length 512
153
+ ```
154
+
155
+ ```
156
+ #llama2-13b sentiment analysis
157
+ CUDA_VISIBLE_DEVICES=1 python benchmarks.py \
158
+ --dataset fpb,fiqa,tfns,nwgi \
159
+ --base_model llama2-13b-nr \
160
+ --peft_model ../finetuned_models/sentiment-llama2-13b-8epoch-16batch_202310271908 \
161
+ --batch_size 8 \
162
+ --max_length 512 \
163
+ --from_remote True
164
+ ```
165
+
166
+ For Zero-shot Evaluation on Sentiment Analysis, we use multiple prompts and evaluate each of them.
167
+ The task indicators are `fiqa_mlt` and `fpb_mlt`.
168
+
169
+
external/FinGPT/fingpt/FinGPT_Benchmark/train.sh ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
2
+ export NCCL_IGNORE_DISABLED_P2P=1
3
+ export TRANSFORMERS_NO_ADVISORY_WARNINGS=1
4
+ export TOKENIZERS_PARALLELISM=0
5
+
6
+
7
+
8
+ #---- Generalization ----
9
+
10
+ # deepspeed train_lora.py \
11
+ # --run_name GRCLS-sentiment-chatglm2-linear-1e-4lr \
12
+ # --base_model chatglm2 \
13
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
14
+ # --test_dataset sentiment-cls-instruct \
15
+ # --max_length 512 \
16
+ # --batch_size 4 \
17
+ # --learning_rate 1e-4 \
18
+ # --num_epochs 1 \
19
+ # --log_interval 10 \
20
+ # --warmup_ratio 0.03 \
21
+ # --scheduler linear \
22
+ # --evaluation_strategy steps \
23
+ # --ds_config config_hf.json
24
+
25
+ # deepspeed train_lora.py \
26
+ # --run_name GRCLS-sentiment-llama2-linear-small \
27
+ # --base_model llama2 \
28
+ # --test_dataset sentiment-cls-instruct \
29
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
30
+ # --max_length 512 \
31
+ # --batch_size 4 \
32
+ # --learning_rate 1e-4 \
33
+ # --num_epochs 1 \
34
+ # --log_interval 10 \
35
+ # --warmup_ratio 0 \
36
+ # --scheduler linear \
37
+ # --evaluation_strategy steps \
38
+ # --eval_steps 100 \
39
+ # --ds_config config_hf.json
40
+
41
+ # deepspeed train_lora.py \
42
+ # --run_name GRCLS-sentiment-falcon-linear-small \
43
+ # --base_model falcon \
44
+ # --test_dataset sentiment-cls-instruct \
45
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
46
+ # --max_length 512 \
47
+ # --batch_size 4 \
48
+ # --learning_rate 1e-4 \
49
+ # --num_epochs 1 \
50
+ # --log_interval 10 \
51
+ # --warmup_ratio 0 \
52
+ # --scheduler linear \
53
+ # --evaluation_strategy steps \
54
+ # --eval_steps 100 \
55
+ # --ds_config config_hf.json
56
+
57
+ # deepspeed train_lora.py \
58
+ # --run_name GRCLS-sentiment-qwen-linear-small \
59
+ # --base_model qwen \
60
+ # --test_dataset sentiment-cls-instruct \
61
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
62
+ # --max_length 512 \
63
+ # --batch_size 4 \
64
+ # --learning_rate 1e-4 \
65
+ # --num_epochs 1 \
66
+ # --log_interval 10 \
67
+ # --warmup_ratio 0 \
68
+ # --scheduler linear \
69
+ # --evaluation_strategy steps \
70
+ # --eval_steps 100 \
71
+ # --ds_config config_hf.json
72
+
73
+ # deepspeed train_lora.py \
74
+ # --run_name GRCLS-sentiment-bloom-linear-small \
75
+ # --base_model bloom \
76
+ # --test_dataset sentiment-cls-instruct \
77
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
78
+ # --max_length 512 \
79
+ # --batch_size 4 \
80
+ # --learning_rate 1e-4 \
81
+ # --num_epochs 1 \
82
+ # --log_interval 10 \
83
+ # --warmup_ratio 0 \
84
+ # --scheduler linear \
85
+ # --evaluation_strategy steps \
86
+ # --eval_steps 100 \
87
+ # --ds_config config_hf.json
88
+
89
+ # deepspeed train_lora.py \
90
+ # --run_name GRCLS-sentiment-mpt-linear-small \
91
+ # --base_model mpt \
92
+ # --dataset headline-cls-instruct,finred-cls-instruct*2,ner-cls-instruct*7 \
93
+ # --test_dataset sentiment-cls-instruct \
94
+ # --max_length 512 \
95
+ # --batch_size 4 \
96
+ # --learning_rate 1e-4 \
97
+ # --num_epochs 1 \
98
+ # --log_interval 10 \
99
+ # --warmup_ratio 0.03 \
100
+ # --scheduler linear \
101
+ # --evaluation_strategy steps \
102
+ # --eval_steps 100 \
103
+ # --ds_config config_hf.json
104
+
105
+
106
+ #---- Multi-Task ----
107
+
108
+ # deepspeed train_lora.py \
109
+ # --run_name MT-chatglm2-linear \
110
+ # --base_model chatglm2 \
111
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
112
+ # --max_length 512 \
113
+ # --batch_size 4 \
114
+ # --learning_rate 1e-4 \
115
+ # --num_epochs 4
116
+
117
+ # deepspeed train_lora.py \
118
+ # --run_name MT-falcon-linear \
119
+ # --base_model falcon \
120
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
121
+ # --max_length 512 \
122
+ # --batch_size 4 \
123
+ # --learning_rate 1e-4 \
124
+ # --num_epochs 4
125
+
126
+ # deepspeed train_lora.py \
127
+ # --run_name MT-qwen-linear \
128
+ # --base_model qwen \
129
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
130
+ # --max_length 512 \
131
+ # --batch_size 4 \
132
+ # --learning_rate 1e-4 \
133
+ # --num_epochs 4
134
+
135
+ # deepspeed train_lora.py \
136
+ # --run_name MT-mpt-linear \
137
+ # --base_model mpt \
138
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
139
+ # --max_length 512 \
140
+ # --batch_size 4 \
141
+ # --learning_rate 1e-4 \
142
+ # --num_epochs 4
143
+
144
+ # deepspeed train_lora.py \
145
+ # --run_name MT-bloom-linear \
146
+ # --base_model bloom \
147
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
148
+ # --max_length 512 \
149
+ # --batch_size 4 \
150
+ # --learning_rate 1e-4 \
151
+ # --num_epochs 4
152
+
153
+ # deepspeed train_lora.py \
154
+ # --run_name MT-llama2-linear \
155
+ # --base_model llama2 \
156
+ # --dataset sentiment-train,headline,finred*3,ner*15 \
157
+ # --max_length 512 \
158
+ # --batch_size 4 \
159
+ # --learning_rate 1e-4 \
160
+ # --num_epochs 4 \
161
+ # --log_interval 10
162
+
163
+
164
+ #---- FinEval ----
165
+
166
+ # deepspeed train_lora.py \
167
+ # --run_name fineval-internlm-linear \
168
+ # --base_model internlm \
169
+ # --dataset data/fingpt-fineval \
170
+ # --max_length 512 \
171
+ # --batch_size 4 \
172
+ # --learning_rate 1e-4 \
173
+ # --num_epochs 50 \
174
+ # --log_interval 10
175
+
176
+ # deepspeed train_lora.py \
177
+ # --run_name fineval-llama2-linear \
178
+ # --base_model llama2 \
179
+ # --dataset data/fingpt-fineval \
180
+ # --max_length 512 \
181
+ # --batch_size 4 \
182
+ # --learning_rate 1e-4 \
183
+ # --num_epochs 50 \
184
+ # --log_interval 10
185
+
186
+ # deepspeed train_lora.py \
187
+ # --run_name fineval-chatglm2-linear \
188
+ # --base_model chatglm2 \
189
+ # --dataset data/fingpt-fineval \
190
+ # --max_length 512 \
191
+ # --batch_size 4 \
192
+ # --learning_rate 1e-4 \
193
+ # --num_epochs 50 \
194
+ # --log_interval 10
195
+
196
+ # deepspeed train_lora.py \
197
+ # --run_name fineval-falcon-linear \
198
+ # --base_model falcon \
199
+ # --dataset data/fingpt-fineval \
200
+ # --max_length 512 \
201
+ # --batch_size 4 \
202
+ # --learning_rate 1e-4 \
203
+ # --num_epochs 50 \
204
+ # --log_interval 10
205
+
206
+ # deepspeed train_lora.py \
207
+ # --run_name fineval-qwen-linear \
208
+ # --base_model qwen \
209
+ # --dataset data/fingpt-fineval \
210
+ # --max_length 512 \
211
+ # --batch_size 4 \
212
+ # --learning_rate 1e-4 \
213
+ # --num_epochs 50 \
214
+ # --log_interval 10
215
+
216
+ # deepspeed train_lora.py \
217
+ # --run_name fineval-mpt-linear \
218
+ # --base_model mpt \
219
+ # --dataset data/fingpt-fineval \
220
+ # --max_length 512 \
221
+ # --batch_size 4 \
222
+ # --learning_rate 1e-4 \
223
+ # --num_epochs 50 \
224
+ # --log_interval 10
225
+
226
+ # deepspeed train_lora.py \
227
+ # --run_name fineval-bloom-linear \
228
+ # --base_model bloom \
229
+ # --dataset data/fingpt-fineval \
230
+ # --max_length 512 \
231
+ # --batch_size 4 \
232
+ # --learning_rate 1e-4 \
233
+ # --num_epochs 50 \
234
+ # --log_interval 10
235
+
236
+
237
+ #---- ConvFinQA ----
238
+
239
+ # deepspeed train_lora.py \
240
+ # --run_name convfinqa-llama2-linear \
241
+ # --base_model llama2 \
242
+ # --ds_config config_hf.json \
243
+ # --dataset data/fingpt-convfinqa \
244
+ # --max_length 2048 \
245
+ # --batch_size 1 \
246
+ # --learning_rate 1e-4 \
247
+ # --num_epochs 4
248
+
249
+ # deepspeed train_lora.py \
250
+ # --run_name convfinqa-chatglm2-linear \
251
+ # --base_model chatglm2 \
252
+ # --dataset data/fingpt-convfinqa \
253
+ # --max_length 2048 \
254
+ # --batch_size 1 \
255
+ # --learning_rate 1e-4 \
256
+ # --num_epochs 4
257
+
258
+ # deepspeed train_lora.py \
259
+ # --run_name convfinqa-falcon-linear \
260
+ # --base_model falcon \
261
+ # --dataset data/fingpt-convfinqa \
262
+ # --max_length 2048 \
263
+ # --batch_size 1 \
264
+ # --learning_rate 1e-4 \
265
+ # --num_epochs 4
266
+
267
+ # deepspeed train_lora.py \
268
+ # --run_name convfinqa-qwen-linear \
269
+ # --base_model qwen \
270
+ # --dataset data/fingpt-convfinqa \
271
+ # --max_length 2048 \
272
+ # --batch_size 1 \
273
+ # --learning_rate 1e-4 \
274
+ # --num_epochs 4
275
+
276
+ # deepspeed train_lora.py \
277
+ # --run_name convfinqa-mpt-linear \
278
+ # --base_model mpt \
279
+ # --dataset data/fingpt-convfinqa \
280
+ # --max_length 2048 \
281
+ # --batch_size 1 \
282
+ # --learning_rate 1e-4 \
283
+ # --num_epochs 4
284
+
285
+ # deepspeed train_lora.py \
286
+ # --run_name convfinqa-bloom-linear \
287
+ # --base_model bloom \
288
+ # --dataset data/fingpt-convfinqa \
289
+ # --max_length 2048 \
290
+ # --batch_size 1 \
291
+ # --learning_rate 1e-4 \
292
+ # --num_epochs 4
293
+
294
+
295
+ #---- NER ----
296
+
297
+ # deepspeed train_lora.py \
298
+ # --run_name ner-llama2-linear \
299
+ # --base_model llama2 \
300
+ # --dataset data/fingpt-ner \
301
+ # --ds_config config_hf.json \
302
+ # --max_length 512 \
303
+ # --batch_size 4 \
304
+ # --learning_rate 1e-4 \
305
+ # --num_epochs 100 \
306
+ # --log_interval 10
307
+
308
+ # deepspeed train_lora.py \
309
+ # --run_name ner-chatglm2-linear \
310
+ # --base_model chatglm2 \
311
+ # --dataset data/fingpt-ner \
312
+ # --max_length 512 \
313
+ # --batch_size 4 \
314
+ # --learning_rate 1e-4 \
315
+ # --num_epochs 100 \
316
+ # --log_interval 10
317
+
318
+ # deepspeed train_lora.py \
319
+ # --run_name ner-falcon-linear \
320
+ # --base_model falcon \
321
+ # --dataset data/fingpt-ner \
322
+ # --max_length 512 \
323
+ # --batch_size 4 \
324
+ # --learning_rate 1e-4 \
325
+ # --num_epochs 100 \
326
+ # --log_interval 10
327
+
328
+ # deepspeed train_lora.py \
329
+ # --run_name ner-qwen-linear \
330
+ # --base_model qwen \
331
+ # --dataset data/fingpt-ner \
332
+ # --max_length 512 \
333
+ # --batch_size 4 \
334
+ # --learning_rate 1e-4 \
335
+ # --num_epochs 100 \
336
+ # --log_interval 10
337
+
338
+ # deepspeed train_lora.py \
339
+ # --run_name ner-mpt-linear \
340
+ # --base_model mpt \
341
+ # --dataset data/fingpt-ner \
342
+ # --max_length 512 \
343
+ # --batch_size 4 \
344
+ # --learning_rate 1e-4 \
345
+ # --num_epochs 100 \
346
+ # --log_interval 10
347
+
348
+ # deepspeed train_lora.py \
349
+ # --run_name ner-bloom-linear \
350
+ # --base_model bloom \
351
+ # --dataset data/fingpt-ner \
352
+ # --max_length 512 \
353
+ # --batch_size 4 \
354
+ # --learning_rate 1e-4 \
355
+ # --num_epochs 100 \
356
+ # --log_interval 10
357
+
358
+
359
+ #---- Headline (IE) ----
360
+
361
+ # deepspeed train_lora.py \
362
+ # --run_name headline-internlm-linear \
363
+ # --base_model internlm \
364
+ # --dataset data/fingpt-headline \
365
+ # --ds_config config_hf.json \
366
+ # --max_length 512 \
367
+ # --batch_size 4 \
368
+ # --learning_rate 1e-4 \
369
+ # --num_epochs 8
370
+
371
+ # deepspeed train_lora.py \
372
+ # --run_name headline-llama2-linear \
373
+ # --base_model llama2 \
374
+ # --dataset data/fingpt-headline \
375
+ # --max_length 512 \
376
+ # --batch_size 4 \
377
+ # --learning_rate 1e-4 \
378
+ # --num_epochs 8
379
+
380
+ # deepspeed train_lora.py \
381
+ # --run_name headline-chatglm2-linear \
382
+ # --base_model chatglm2 \
383
+ # --dataset data/fingpt-headline \
384
+ # --max_length 512 \
385
+ # --batch_size 4 \
386
+ # --learning_rate 1e-4 \
387
+ # --num_epochs 8
388
+
389
+ # deepspeed train_lora.py \
390
+ # --run_name headline-falcon-linear \
391
+ # --base_model falcon \
392
+ # --dataset data/fingpt-headline \
393
+ # --max_length 512 \
394
+ # --batch_size 4 \
395
+ # --learning_rate 1e-4 \
396
+ # --num_epochs 8
397
+
398
+ # deepspeed train_lora.py \
399
+ # --run_name headline-qwen-linear \
400
+ # --base_model qwen \
401
+ # --dataset data/fingpt-headline \
402
+ # --max_length 512 \
403
+ # --batch_size 4 \
404
+ # --learning_rate 1e-4 \
405
+ # --num_epochs 8
406
+
407
+ # deepspeed train_lora.py \
408
+ # --run_name headline-mpt-linear \
409
+ # --base_model mpt \
410
+ # --dataset data/fingpt-headline \
411
+ # --max_length 512 \
412
+ # --batch_size 4 \
413
+ # --learning_rate 1e-4 \
414
+ # --num_epochs 8
415
+
416
+ # deepspeed train_lora.py \
417
+ # --run_name headline-bloom-linear \
418
+ # --base_model bloom \
419
+ # --dataset data/fingpt-headline \
420
+ # --max_length 512 \
421
+ # --batch_size 4 \
422
+ # --learning_rate 1e-4 \
423
+ # --num_epochs 8
424
+
425
+ #---- Sentiment Analysis ----
426
+
427
+ # deepspeed train_lora.py \
428
+ # --run_name sentiment-internlm-linear \
429
+ # --base_model internlm \
430
+ # --dataset data/fingpt-sentiment-train \
431
+ # --max_length 512 \
432
+ # --batch_size 4 \
433
+ # --learning_rate 1e-4 \
434
+ # --num_epochs 8
435
+
436
+ # deepspeed train_lora.py \
437
+ # --run_name sentiment-llama2-linear \
438
+ # --base_model llama2 \
439
+ # --dataset data/fingpt-sentiment-train \
440
+ # --ds_config config_hf.json \
441
+ # --max_length 512 \
442
+ # --batch_size 4 \
443
+ # --learning_rate 1e-4 \
444
+ # --num_epochs 8
445
+
446
+ # deepspeed train_lora.py \
447
+ # --run_name sentiment-chatglm2-linear \
448
+ # --base_model chatglm2 \
449
+ # --dataset data/fingpt-sentiment-train \
450
+ # --max_length 512 \
451
+ # --batch_size 4 \
452
+ # --learning_rate 1e-4 \
453
+ # --num_epochs 8
454
+
455
+ # deepspeed train_lora.py \
456
+ # --run_name sentiment-falcon-linear \
457
+ # --base_model falcon \
458
+ # --dataset data/fingpt-sentiment-train \
459
+ # --max_length 512 \
460
+ # --batch_size 4 \
461
+ # --learning_rate 1e-4 \
462
+ # --num_epochs 8
463
+
464
+ # deepspeed train_lora.py \
465
+ # --run_name sentiment-qwen-linear \
466
+ # --base_model qwen \
467
+ # --dataset data/fingpt-sentiment-train \
468
+ # --max_length 512 \
469
+ # --batch_size 4 \
470
+ # --learning_rate 1e-4 \
471
+ # --num_epochs 8
472
+
473
+ # deepspeed train_lora.py \
474
+ # --run_name sentiment-mpt-linear \
475
+ # --base_model mpt \
476
+ # --dataset data/fingpt-sentiment-train \
477
+ # --max_length 512 \
478
+ # --batch_size 4 \
479
+ # --learning_rate 1e-4 \
480
+ # --num_epochs 8
481
+
482
+ # deepspeed train_lora.py \
483
+ # --run_name sentiment-bloom-linear \
484
+ # --base_model bloom \
485
+ # --dataset data/fingpt-sentiment-train \
486
+ # --max_length 512 \
487
+ # --batch_size 4 \
488
+ # --learning_rate 1e-4 \
489
+ # --num_epochs 8
490
+
491
+
492
+ #---- Relation Extraction ----
493
+
494
+ # deepspeed train_lora.py \
495
+ # --run_name finred-llama2-linear \
496
+ # --base_model llama2 \
497
+ # --dataset data/fingpt-finred-re \
498
+ # --ds_config config_hf.json \
499
+ # --max_length 512 \
500
+ # --batch_size 4 \
501
+ # --learning_rate 1e-4 \
502
+ # --num_epochs 8
503
+
504
+ # deepspeed train_lora.py \
505
+ # --run_name finred-chatglm2-linear \
506
+ # --base_model chatglm2 \
507
+ # --dataset data/fingpt-finred-re \
508
+ # --max_length 512 \
509
+ # --batch_size 4 \
510
+ # --learning_rate 1e-4 \
511
+ # --num_epochs 8
512
+
513
+ # deepspeed train_lora.py \
514
+ # --run_name finred-falcon-linear \
515
+ # --base_model falcon \
516
+ # --dataset data/fingpt-finred-re \
517
+ # --max_length 512 \
518
+ # --batch_size 4 \
519
+ # --learning_rate 1e-4 \
520
+ # --num_epochs 8
521
+
522
+ # deepspeed train_lora.py \
523
+ # --run_name finred-qwen-linear \
524
+ # --base_model qwen \
525
+ # --dataset data/fingpt-finred-re \
526
+ # --max_length 512 \
527
+ # --batch_size 4 \
528
+ # --learning_rate 1e-4 \
529
+ # --num_epochs 8
530
+
531
+ # deepspeed train_lora.py \
532
+ # --run_name finred-mpt-linear \
533
+ # --base_model mpt \
534
+ # --dataset data/fingpt-finred-re \
535
+ # --max_length 512 \
536
+ # --batch_size 4 \
537
+ # --learning_rate 1e-4 \
538
+ # --num_epochs 8
539
+
540
+ # deepspeed train_lora.py \
541
+ # --run_name finred-bloom-linear \
542
+ # --base_model bloom \
543
+ # --dataset data/fingpt-finred-re \
544
+ # --max_length 512 \
545
+ # --batch_size 4 \
546
+ # --learning_rate 1e-4 \
547
+ # --num_epochs 8
external/FinGPT/fingpt/FinGPT_Benchmark/train_lora.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import argparse
4
+ from datetime import datetime
5
+ from functools import partial
6
+ import datasets
7
+ import torch
8
+ from torch.utils.tensorboard import SummaryWriter
9
+ import wandb
10
+ from transformers import (
11
+ AutoTokenizer,
12
+ AutoModel,
13
+ AutoModelForCausalLM,
14
+ TrainingArguments,
15
+ Trainer,
16
+ DataCollatorForSeq2Seq
17
+ )
18
+ from transformers.trainer import TRAINING_ARGS_NAME
19
+ from transformers.integrations import TensorBoardCallback
20
+ # Importing LoRA specific modules
21
+ from peft import (
22
+ TaskType,
23
+ LoraConfig,
24
+ get_peft_model,
25
+ get_peft_model_state_dict,
26
+ prepare_model_for_int8_training,
27
+ set_peft_model_state_dict
28
+ )
29
+ from utils import *
30
+
31
+
32
+ # Replace with your own api_key and project name
33
+ os.environ['WANDB_API_KEY'] = 'ecf1e5e4f47441d46822d38a3249d62e8fc94db4'
34
+ os.environ['WANDB_PROJECT'] = 'fingpt-benchmark'
35
+
36
+
37
+ def main(args):
38
+ """
39
+ Main function to execute the training script.
40
+
41
+ :param args: Command line arguments
42
+ """
43
+
44
+ # Parse the model name and determine if it should be fetched from a remote source
45
+ model_name = parse_model_name(args.base_model, args.from_remote)
46
+
47
+ # Load the pre-trained causal language model
48
+ model = AutoModelForCausalLM.from_pretrained(
49
+ model_name,
50
+ # load_in_8bit=True,
51
+ # device_map="auto",
52
+ trust_remote_code=True
53
+ )
54
+ # Print model architecture for the first process in distributed training
55
+ if args.local_rank == 0:
56
+ print(model)
57
+
58
+ # Load tokenizer associated with the pre-trained model
59
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
60
+
61
+ # Apply model specific tokenization settings
62
+ if args.base_model != 'mpt':
63
+ tokenizer.padding_side = "left"
64
+ if args.base_model == 'qwen':
65
+ tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids('<|endoftext|>')
66
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids('<|extra_0|>')
67
+ # Ensure padding token is set correctly
68
+ if not tokenizer.pad_token or tokenizer.pad_token_id == tokenizer.eos_token_id:
69
+ tokenizer.add_special_tokens({'pad_token': '[PAD]'})
70
+ model.resize_token_embeddings(len(tokenizer))
71
+
72
+ # Load training and testing datasets
73
+ dataset_list = load_dataset(args.dataset, args.from_remote)
74
+ dataset_train = datasets.concatenate_datasets([d['train'] for d in dataset_list]).shuffle(seed=42)
75
+
76
+ if args.test_dataset:
77
+ dataset_list = load_dataset(args.test_dataset, args.from_remote)
78
+ dataset_test = datasets.concatenate_datasets([d['test'] for d in dataset_list])
79
+
80
+ dataset = datasets.DatasetDict({'train': dataset_train, 'test': dataset_test})
81
+ # Display first sample from the training dataset
82
+ print(dataset['train'][0])
83
+ # Filter out samples that exceed the maximum token length and remove unused columns
84
+ dataset = dataset.map(partial(tokenize, args, tokenizer))
85
+ print('original dataset length: ', len(dataset['train']))
86
+ dataset = dataset.filter(lambda x: not x['exceed_max_length'])
87
+ print('filtered dataset length: ', len(dataset['train']))
88
+ dataset = dataset.remove_columns(['instruction', 'input', 'output', 'exceed_max_length'])
89
+
90
+ print(dataset['train'][0])
91
+
92
+ # Create a timestamp for model saving
93
+ current_time = datetime.now()
94
+ formatted_time = current_time.strftime('%Y%m%d%H%M')
95
+
96
+ # Set up training arguments
97
+ training_args = TrainingArguments(
98
+ output_dir=f'finetuned_models/{args.run_name}_{formatted_time}', # 保存位置
99
+ logging_steps=args.log_interval,
100
+ num_train_epochs=args.num_epochs,
101
+ per_device_train_batch_size=args.batch_size,
102
+ per_device_eval_batch_size=args.batch_size,
103
+ gradient_accumulation_steps=args.gradient_steps,
104
+ dataloader_num_workers=args.num_workers,
105
+ learning_rate=args.learning_rate,
106
+ warmup_ratio=args.warmup_ratio,
107
+ lr_scheduler_type=args.scheduler,
108
+ save_steps=args.eval_steps,
109
+ eval_steps=args.eval_steps,
110
+ fp16=True,
111
+ # fp16_full_eval=True,
112
+ deepspeed=args.ds_config,
113
+ evaluation_strategy=args.evaluation_strategy,
114
+ load_best_model_at_end=args.load_best_model,
115
+ remove_unused_columns=False,
116
+ report_to='wandb',
117
+ run_name=args.run_name
118
+ )
119
+ if not args.base_model == 'mpt':
120
+ model.gradient_checkpointing_enable()
121
+ model.enable_input_require_grads()
122
+ model.is_parallelizable = True
123
+ model.model_parallel = True
124
+ model.config.use_cache = (
125
+ False
126
+ )
127
+ # model = prepare_model_for_int8_training(model
128
+
129
+ # setup peft for lora
130
+ peft_config = LoraConfig(
131
+ task_type=TaskType.CAUSAL_LM,
132
+ inference_mode=False,
133
+ r=8,
134
+ lora_alpha=32,
135
+ lora_dropout=0.1,
136
+ target_modules=lora_module_dict[args.base_model],
137
+ bias='none',
138
+ )
139
+ model = get_peft_model(model, peft_config)
140
+
141
+ # Initialize TensorBoard for logging
142
+ writer = SummaryWriter()
143
+
144
+ # Initialize the trainer
145
+ trainer = Trainer(
146
+ model=model,
147
+ args=training_args,
148
+ train_dataset=dataset["train"],
149
+ eval_dataset=dataset["test"],
150
+ data_collator=DataCollatorForSeq2Seq(
151
+ tokenizer, padding=True,
152
+ return_tensors="pt"
153
+ ),
154
+ callbacks=[TensorBoardCallback(writer)],
155
+ )
156
+
157
+ # if torch.__version__ >= "2" and sys.platform != "win32":
158
+ # model = torch.compile(model)
159
+
160
+ # Clear CUDA cache and start training
161
+ torch.cuda.empty_cache()
162
+ trainer.train()
163
+ writer.close()
164
+
165
+ # Save the fine-tuned model
166
+ model.save_pretrained(training_args.output_dir)
167
+
168
+
169
+ if __name__ == "__main__":
170
+ # Argument parser for command line arguments
171
+ parser = argparse.ArgumentParser()
172
+ parser.add_argument("--local_rank", default=0, type=int)
173
+ parser.add_argument("--run_name", default='local-test', type=str)
174
+ parser.add_argument("--dataset", required=True, type=str)
175
+ parser.add_argument("--test_dataset", type=str)
176
+ parser.add_argument("--base_model", required=True, type=str, choices=['chatglm2', 'llama2', 'llama2-13b', 'llama2-13b-nr', 'baichuan', 'falcon', 'internlm', 'qwen', 'mpt', 'bloom'])
177
+ parser.add_argument("--max_length", default=512, type=int)
178
+ parser.add_argument("--batch_size", default=4, type=int, help="The train batch size per device")
179
+ parser.add_argument("--learning_rate", default=1e-4, type=float, help="The learning rate")
180
+ parser.add_argument("--num_epochs", default=8, type=float, help="The training epochs")
181
+ parser.add_argument("--gradient_steps", default=8, type=float, help="The gradient accumulation steps")
182
+ parser.add_argument("--num_workers", default=8, type=int, help="dataloader workers")
183
+ parser.add_argument("--log_interval", default=20, type=int)
184
+ parser.add_argument("--warmup_ratio", default=0.05, type=float)
185
+ parser.add_argument("--ds_config", default='./config_new.json', type=str)
186
+ parser.add_argument("--scheduler", default='linear', type=str)
187
+ parser.add_argument("--instruct_template", default='default')
188
+ parser.add_argument("--evaluation_strategy", default='steps', type=str)
189
+ parser.add_argument("--load_best_model", default='False', type=bool)
190
+ parser.add_argument("--eval_steps", default=0.1, type=float)
191
+ parser.add_argument("--from_remote", default=False, type=bool)
192
+ args = parser.parse_args()
193
+
194
+ # Login to Weights and Biases
195
+ wandb.login()
196
+
197
+ # Run the main function
198
+ main(args)
external/FinGPT/fingpt/FinGPT_Benchmark/utils.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datasets
3
+
4
+ # A dictionary to store various prompt templates.
5
+ template_dict = {
6
+ 'default': 'Instruction: {instruction}\nInput: {input}\nAnswer: '
7
+ }
8
+
9
+ # A dictionary to store the LoRA module mapping for different models.
10
+ lora_module_dict = {
11
+ 'chatglm2': ['query_key_value'],
12
+ 'falcon': ['query_key_value'],
13
+ 'bloom': ['query_key_value'],
14
+ 'internlm': ['q_proj', 'k_proj', 'v_proj'],
15
+ 'llama2': ['q_proj', 'k_proj', 'v_proj'],
16
+ 'llama2-13b': ['q_proj', 'k_proj', 'v_proj'],
17
+ 'llama2-13b-nr': ['q_proj', 'k_proj', 'v_proj'],
18
+ 'qwen': ["c_attn"],
19
+ 'mpt': ['Wqkv'],
20
+ 'baichuan': ['q_proj', 'k_proj', 'v_proj'],
21
+ }
22
+
23
+
24
+ def get_prompt(template, instruction, input_text):
25
+ """
26
+ Generates a prompt based on a predefined template, instruction, and input.
27
+
28
+ Args:
29
+ template (str): The key to select the prompt template from the predefined dictionary.
30
+ instruction (str): The instruction text to be included in the prompt.
31
+ input_text (str): The input text to be included in the prompt.
32
+
33
+ Returns:
34
+ str: The generated prompt.
35
+
36
+ Raises:
37
+ KeyError: If the provided template key is not found in the template dictionary.
38
+ """
39
+ if not instruction:
40
+ return input_text
41
+
42
+ if template not in template_dict:
43
+ raise KeyError(f"Template '{template}' not found. Available templates: {', '.join(template_dict.keys())}")
44
+
45
+ return template_dict[template].format(instruction=instruction, input=input_text)
46
+
47
+
48
+ def test_mapping(args, feature):
49
+ """
50
+ Generate a mapping for testing purposes by constructing a prompt based on given instructions and input.
51
+
52
+ Args:
53
+ args (Namespace): A namespace object that holds various configurations, including the instruction template.
54
+ feature (dict): A dictionary containing 'instruction' and 'input' fields used to construct the prompt.
55
+
56
+ Returns:
57
+ dict: A dictionary containing the generated prompt.
58
+
59
+ Raises:
60
+ ValueError: If 'instruction' or 'input' are not provided in the feature dictionary.
61
+ """
62
+ # Ensure 'instruction' and 'input' are present in the feature dictionary.
63
+ if 'instruction' not in feature or 'input' not in feature:
64
+ raise ValueError("Both 'instruction' and 'input' need to be provided in the feature dictionary.")
65
+
66
+ # Construct the prompt using the provided instruction and input.
67
+ prompt = get_prompt(
68
+ args.instruct_template,
69
+ feature['instruction'],
70
+ feature['input']
71
+ )
72
+
73
+ return {
74
+ "prompt": prompt,
75
+ }
76
+
77
+ def tokenize(args, tokenizer, feature):
78
+ """
79
+ Tokenizes the input prompt and target/output for model training or evaluation.
80
+
81
+ Args:
82
+ args (Namespace): A namespace object containing various settings and configurations.
83
+ tokenizer (Tokenizer): A tokenizer object used to convert text into tokens.
84
+ feature (dict): A dictionary containing 'input', 'instruction', and 'output' fields.
85
+
86
+ Returns:
87
+ dict: A dictionary containing tokenized 'input_ids', 'labels', and a flag 'exceed_max_length'.
88
+ """
89
+ # Generate the prompt.
90
+ prompt = get_prompt(
91
+ args.instruct_template,
92
+ feature['instruction'],
93
+ feature['input']
94
+ )
95
+ # Tokenize the prompt.
96
+ prompt_ids = tokenizer(
97
+ prompt,
98
+ padding=False,
99
+ max_length=args.max_length,
100
+ truncation=True
101
+ )['input_ids']
102
+
103
+ # Tokenize the target/output.
104
+ target_ids = tokenizer(
105
+ feature['output'].strip(),
106
+ padding=False,
107
+ max_length=args.max_length,
108
+ truncation=True,
109
+ add_special_tokens=False
110
+ )['input_ids']
111
+
112
+ # Combine tokenized prompt and target output.
113
+ input_ids = prompt_ids + target_ids
114
+
115
+ # Check if the combined length exceeds the maximum allowed length.
116
+ exceed_max_length = len(input_ids) >= args.max_length
117
+
118
+ # Add an end-of-sequence (EOS) token if it's not already present
119
+ # and if the sequence length is within the limit.
120
+ if input_ids[-1] != tokenizer.eos_token_id and not exceed_max_length:
121
+ input_ids.append(tokenizer.eos_token_id)
122
+
123
+ # Create label IDs for training.
124
+ # The labels should start from where the prompt ends, and be padded for the prompt portion.
125
+ label_ids = [tokenizer.pad_token_id] * len(prompt_ids) + input_ids[len(prompt_ids):]
126
+
127
+ return {
128
+ "input_ids": input_ids,
129
+ "labels": label_ids,
130
+ "exceed_max_length": exceed_max_length
131
+ }
132
+
133
+
134
+ def parse_model_name(name, from_remote=False):
135
+ """
136
+ Parse the model name and return the appropriate path based on whether
137
+ the model is to be fetched from a remote source or from a local source.
138
+
139
+ Args:
140
+ - name (str): Name of the model.
141
+ - from_remote (bool): If True, return the remote path, else return the local path.
142
+
143
+ Returns:
144
+ - str: The appropriate path for the given model name.
145
+ """
146
+ model_paths = {
147
+ 'chatglm2': ('THUDM/chatglm2-6b', 'base_models/chatglm2-6b'),
148
+ 'llama2': ('meta-llama/Llama-2-7b-hf', 'base_models/Llama-2-7b-hf'),
149
+ 'llama2-13b': ('meta-llama/Llama-2-13b-hf', 'base_models/Llama-2-13b-hf'),
150
+ 'llama2-13b-nr': ('NousResearch/Llama-2-13b-hf', 'base_models/Llama-2-13b-hf'),
151
+ 'falcon': ('tiiuae/falcon-7b', 'base_models/falcon-7b'),
152
+ 'internlm': ('internlm/internlm-7b', 'base_models/internlm-7b'),
153
+ 'qwen': ('Qwen/Qwen-7B', 'base_models/Qwen-7B'),
154
+ 'baichuan': ('baichuan-inc/Baichuan2-7B-Base', 'base_models/Baichuan2-7B-Base'),
155
+ 'mpt': ('cekal/mpt-7b-peft-compatible', 'base_models/mpt-7b-peft-compatible'),
156
+ 'bloom': ('bigscience/bloom-7b1', 'base_models/bloom-7b1')
157
+ }
158
+
159
+ if name in model_paths:
160
+ return model_paths[name][0] if from_remote else model_paths[name][1]
161
+ else:
162
+ valid_model_names = ', '.join(model_paths.keys())
163
+ raise ValueError(f"Undefined base model '{name}'. Valid model names are: {valid_model_names}")
164
+
165
+
166
+ def load_dataset(names, from_remote=False):
167
+ """
168
+ Load one or multiple datasets based on the provided names and source location.
169
+
170
+ Args:
171
+ names (str): A comma-separated list of dataset names. Each name can be followed by '*n' to indicate replication.
172
+ from_remote (bool): If True, load the dataset from Hugging Face's model hub. Otherwise, load it from a local disk.
173
+
174
+ Returns:
175
+ List[Dataset]: A list of loaded datasets. Each dataset is possibly replicated based on the input names.
176
+ """
177
+ # Split the dataset names by commas for handling multiple datasets
178
+ dataset_names = names.split(',')
179
+ dataset_list = []
180
+
181
+ for name in dataset_names:
182
+ # Initialize replication factor to 1
183
+ replication_factor = 1
184
+ dataset_name = name
185
+
186
+ # Check if the dataset name includes a replication factor
187
+ if '*' in name:
188
+ dataset_name, replication_factor = name.split('*')
189
+ replication_factor = int(replication_factor)
190
+ if replication_factor < 1:
191
+ raise ValueError("Replication factor must be a positive integer.")
192
+
193
+ # Construct the correct dataset path or name based on the source location
194
+ dataset_path_or_name = ('FinGPT/fingpt-' if from_remote else 'data/fingpt-') + dataset_name
195
+ if not os.path.exists(dataset_path_or_name) and not from_remote:
196
+ raise FileNotFoundError(f"The dataset path {dataset_path_or_name} does not exist.")
197
+
198
+ # Load the dataset
199
+ try:
200
+ tmp_dataset = datasets.load_dataset(dataset_path_or_name) if from_remote else datasets.load_from_disk(
201
+ dataset_path_or_name)
202
+ except Exception as e:
203
+ raise RuntimeError(f"Failed to load the dataset: {str(e)}")
204
+
205
+ # Check for 'test' split and create it from 'train' if necessary
206
+ if 'test' not in tmp_dataset:
207
+ if 'train' in tmp_dataset:
208
+ tmp_dataset = tmp_dataset['train']
209
+ tmp_dataset = tmp_dataset.train_test_split(test_size=0.2, shuffle=True, seed=42)
210
+ else:
211
+ raise ValueError("The dataset must contain a 'train' or 'test' split.")
212
+
213
+ # Append the possibly replicated dataset to the list
214
+ dataset_list.extend([tmp_dataset] * replication_factor)
215
+
216
+ return dataset_list