Update README.md
Browse files
README.md
CHANGED
@@ -48,6 +48,70 @@ This project uses a fine-tuned version of the BART model from Facebook for summa
|
|
48 |
- **Original Model:** [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn)
|
49 |
- **Fine-Tuned Model:** [julsCadenas/summarize-reddit](https://huggingface.co/julsCadenas/summarize-reddit)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
## **Model Evaluation**
|
52 |
|
53 |
### **ROGUE-1 SCORES:**
|
|
|
48 |
- **Original Model:** [facebook/bart-large-cnn](https://huggingface.co/facebook/bart-large-cnn)
|
49 |
- **Fine-Tuned Model:** [julsCadenas/summarize-reddit](https://huggingface.co/julsCadenas/summarize-reddit)
|
50 |
|
51 |
+
## **Usage**
|
52 |
+
|
53 |
+
```python
|
54 |
+
from transformers import pipeline
|
55 |
+
|
56 |
+
class Summarize:
|
57 |
+
def __init__(self):
|
58 |
+
self.summarizer = pipeline(
|
59 |
+
"summarization",
|
60 |
+
model = "julsCadenas/summarize-reddit",
|
61 |
+
tokenizer = "julsCadenas/summarize-reddit",
|
62 |
+
)
|
63 |
+
|
64 |
+
def summarize(self, text, prompt):
|
65 |
+
inputs = f"{prompt}: {text}"
|
66 |
+
input_tokens = self.summarizer.tokenizer.encode(inputs, truncation=False)
|
67 |
+
input_len = len(input_tokens)
|
68 |
+
max_length = min(input_len * 2, 1024) # change depending on your use case
|
69 |
+
min_length = max(32, input_len // 4) # change depending on your use case
|
70 |
+
summary = self.summarizer(
|
71 |
+
inputs,
|
72 |
+
max_length=max_length,
|
73 |
+
min_length=min_length,
|
74 |
+
length_penalty=2.0,
|
75 |
+
num_beams=4,
|
76 |
+
)
|
77 |
+
return summary[0]['summary_text']
|
78 |
+
|
79 |
+
def process_data(self, response, prompt):
|
80 |
+
post_content = response[0]['data']['children'][0]['data'].get('selftext', '')
|
81 |
+
comments = []
|
82 |
+
for comment in response[1]['data']['children']:
|
83 |
+
if 'body' in comment['data']:
|
84 |
+
comments.append(comment['data']['body'])
|
85 |
+
comments_all = ' '.join(comments)
|
86 |
+
|
87 |
+
post_summary = self.summarize(post_content, prompt)
|
88 |
+
comments_summary = self.summarize(comments_all, prompt)
|
89 |
+
|
90 |
+
return {
|
91 |
+
"post_summary": post_summary,
|
92 |
+
"comments_summary": comments_summary
|
93 |
+
}
|
94 |
+
```
|
95 |
+
|
96 |
+
- You can also use a script to format the JSON
|
97 |
+
```python
|
98 |
+
def fix_json(jsonfile, path):
|
99 |
+
improper_json = jsonfile
|
100 |
+
|
101 |
+
fixed_json = json.loads(improper_json)
|
102 |
+
|
103 |
+
fixed_post_summary = json.loads(fixed_json['post_summary'])
|
104 |
+
fixed_comments_summary = json.loads(fixed_json['comments_summary'])
|
105 |
+
|
106 |
+
fixed_json['post_summary'] = fixed_post_summary
|
107 |
+
fixed_json['comments_summary'] = fixed_comments_summary
|
108 |
+
|
109 |
+
print(json.dumps(fixed_json, indent=4))
|
110 |
+
|
111 |
+
with open(path, 'w') as file:
|
112 |
+
json.dump(fixed_json, file, indent=4)
|
113 |
+
```
|
114 |
+
|
115 |
## **Model Evaluation**
|
116 |
|
117 |
### **ROGUE-1 SCORES:**
|