Spaces:

cheesexuebao
/

murphy

Sleeping

App Files Files Community

cheesexuebao commited on Dec 23, 2023

Commit

cf5b520

1 Parent(s): 442a3b7

fix: 🐛 多标签变成多分类

Browse files

Files changed (18) hide show

Prediction.py +6 -8
app.py +34 -37
assets/csv_examples.csv +0 -30
assets/example.csv +15 -0
assets/examples.txt +14 -14
convert.py +4 -2
models/All_Data/config.json +4 -2
models/All_Data/pytorch_model.bin +2 -2
models/Facebook/config.json +0 -37
models/Facebook/pytorch_model.bin +0 -3
models/Facebook/vocab.txt +0 -0
models/Kickstarter/config.json +0 -37
models/Kickstarter/pytorch_model.bin +0 -3
models/Kickstarter/vocab.txt +0 -0
models/Twitter/config.json +0 -37
models/Twitter/pytorch_model.bin +0 -3
models/Twitter/vocab.txt +0 -0
tmp.py +5 -0

Prediction.py CHANGED Viewed

@@ -8,7 +8,7 @@ import glob
 RANDOM_SEED = 42
 pd.RANDOM_SEED = 42
-LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone"]
 @torch.no_grad()
@@ -34,16 +34,14 @@ def predict_csv(data, text_col, tokenizer, model, device, text_bs=16, max_token_
           encoding["attention_mask"].to(device),
           return_dict=True
       ).logits
-      prediction = torch.sigmoid(logits)
       predictions.append(prediction.detach().cpu())
     final_pred = torch.cat(predictions, dim=0)
     y_inten = final_pred.numpy().T
-    data[LABEL_COLUMNS[0]] = y_inten[0].tolist()
-    data[LABEL_COLUMNS[1]] = y_inten[1].tolist()
-    data[LABEL_COLUMNS[2]] = y_inten[2].tolist()
-    data[LABEL_COLUMNS[3]] = y_inten[3].tolist()
     return data
 @torch.no_grad()
@@ -63,7 +61,7 @@ def predict_single(sentence, tokenizer, model, device, max_token_len=128):
         encoding["attention_mask"].to(device),
         return_dict=True
     ).logits
-    prediction = torch.sigmoid(logits)
     y_inten = prediction.flatten().cpu().numpy().T.tolist()
     return y_inten
@@ -84,7 +82,7 @@ def model_factory(local_path, device):
 if __name__ == "__main__":
-  Data = pd.read_csv("Kickstarter_sentence_level_5000.csv")
   Data = Data[:20]
   device = torch.device('cpu')

 RANDOM_SEED = 42
 pd.RANDOM_SEED = 42
+LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone", "None"]
 @torch.no_grad()
           encoding["attention_mask"].to(device),
           return_dict=True
       ).logits
+      prediction = torch.softmax(logits, dim=1)
       predictions.append(prediction.detach().cpu())
     final_pred = torch.cat(predictions, dim=0)
     y_inten = final_pred.numpy().T
+    for i in range(len(LABEL_COLUMNS)):
+      data[LABEL_COLUMNS[i]] = y_inten[i].tolist()
     return data
 @torch.no_grad()
         encoding["attention_mask"].to(device),
         return_dict=True
     ).logits
+    prediction = torch.softmax(logits, dim=1)
     y_inten = prediction.flatten().cpu().numpy().T.tolist()
     return y_inten
 if __name__ == "__main__":
+  Data = pd.read_csv("assets/Kickstarter_sentence_level_5000.csv")
   Data = Data[:20]
   device = torch.device('cpu')

app.py CHANGED Viewed

@@ -23,34 +23,34 @@ device = torch.device('cpu')
 manager = model_factory("./models", device)
-def single_sentence(sentence, model_select):
     df = []
-    for model_name in model_select:
-        dct = manager[model_name]
-        model, tokenizer = dct['model'], dct['tokenizer']
-        predictions = predict_single(sentence, tokenizer, model, device)
-        df.append([model_name] + predictions)
     return df
-def csv_process(csv_file, model_select, attr="content"):
     current_time = datetime.now()
     formatted_time = current_time.strftime("%Y_%m_%d_%H_%M_%S")
-    df = pd.read_csv(csv_file.name)
     os.makedirs('output', exist_ok=True)
     outputs = []
-    for model_name in model_select:
-        data = df.copy(deep=True)
-        dct = manager[model_name]
-        model, tokenizer = dct['model'], dct['tokenizer']
-        predictions = predict_csv(data, attr, tokenizer, model, device)
-        output_path = f"output/prediction_{model_name}_{formatted_time}.csv"
-        predictions.to_csv(output_path)
-        outputs.append(output_path)
     return outputs
 my_theme = gr.Theme.from_hub("JohnSmith9982/small_and_pretty")
-with gr.Blocks(theme=my_theme, title='XXX') as demo:
     gr.HTML(
         """
         <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
@@ -72,24 +72,23 @@ with gr.Blocks(theme=my_theme, title='XXX') as demo:
         with gr.Row():
             tbox_input = gr.Textbox(label="Input",
                                     info="Please input a sentence here:")
-            model_select = gr.CheckboxGroup(manager.keys(),
-                                            label="Models:",
-                                            info="Selecting different model variants to obtain aggregated predictions.")
         tab_output = gr.DataFrame(label='Probability Predictions:',
-                                  headers=["model"] + LABEL_COLUMNS,
-                                  datatype=["str"] * (len(LABEL_COLUMNS)+1),
-                                  interactive=False,
-                                  wrap=True)
         with gr.Row():
             button_ss = gr.Button("Submit", variant="primary")
-            button_ss.click(fn=single_sentence, inputs=[tbox_input, model_select], outputs=[tab_output])
             gr.ClearButton([tbox_input, tab_output])
-        gr.Markdown("## Examples")
         gr.Examples(
             examples=examples,
             inputs=tbox_input,
-            examples_per_page=5
         )
     with gr.Tab("Csv File"):
@@ -100,20 +99,18 @@ with gr.Blocks(theme=my_theme, title='XXX') as demo:
                                 )
             csv_output = gr.File(label="Predictions:")
-        model_select = gr.CheckboxGroup(manager.keys(),
-                label="Models:",
-                info="Selecting different model variants to obtain aggregated predictions.")
         with gr.Row():
             button = gr.Button("Submit", variant="primary")
-            button.click(fn=csv_process, inputs=[csv_input, model_select], outputs=[csv_output])
             gr.ClearButton([csv_input, csv_output])
-        gr.Markdown("## Examples")
-        gr.Examples(
-            examples=["assets/csv_examples.csv",],
-            inputs=csv_input
-        )
     with gr.Tab("Readme"):
         gr.Markdown(

 manager = model_factory("./models", device)
+def single_sentence(sentence):
     df = []
+    model_name = 'All_Data'
+    dct = manager[model_name]
+    model, tokenizer = dct['model'], dct['tokenizer']
+    predictions = predict_single(sentence, tokenizer, model, device)
+    df.append([model_name] + predictions)
     return df
+def csv_process(csv_file, attr="content"):
     current_time = datetime.now()
     formatted_time = current_time.strftime("%Y_%m_%d_%H_%M_%S")
+    data = pd.read_csv(csv_file.name)
+    data = data.reset_index()
     os.makedirs('output', exist_ok=True)
     outputs = []
+    model_name = 'All_Data'
+    dct = manager[model_name]
+    model, tokenizer = dct['model'], dct['tokenizer']
+    predictions = predict_csv(data, attr, tokenizer, model, device)
+    output_path = f"output/prediction_{model_name}_{formatted_time}.csv"
+    predictions.to_csv(output_path)
+    outputs.append(output_path)
     return outputs
 my_theme = gr.Theme.from_hub("JohnSmith9982/small_and_pretty")
+with gr.Blocks(theme=my_theme, title='Murphy') as demo:
     gr.HTML(
         """
         <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
         with gr.Row():
             tbox_input = gr.Textbox(label="Input",
                                     info="Please input a sentence here:")
+            gr.Markdown("""
+                # Detailed Information About our Model
+                ...
+                """)
         tab_output = gr.DataFrame(label='Probability Predictions:',
+                                  headers=LABEL_COLUMNS,
+                                  datatype=["str"] * (len(LABEL_COLUMNS)),
+                                  interactive=False)
         with gr.Row():
             button_ss = gr.Button("Submit", variant="primary")
+            button_ss.click(fn=single_sentence, inputs=[tbox_input], outputs=[tab_output])
             gr.ClearButton([tbox_input, tab_output])
         gr.Examples(
             examples=examples,
             inputs=tbox_input,
+            examples_per_page=len(examples)
         )
     with gr.Tab("Csv File"):
                                 )
             csv_output = gr.File(label="Predictions:")
         with gr.Row():
             button = gr.Button("Submit", variant="primary")
+            button.click(fn=csv_process, inputs=[csv_input], outputs=[csv_output])
             gr.ClearButton([csv_input, csv_output])
+        gr.Markdown("## Examples \n The incoming CSV must include the ``content`` field, which represents the text that needs to be predicted!")
+        gr.DataFrame(label='Csv input format:',
+                    value=[[i, examples[i]] for i in range(len(examples))],
+                    headers=["index", "content"],
+                    datatype=["number","str"],
+                    interactive=False
+                    )
     with gr.Tab("Readme"):
         gr.Markdown(

assets/csv_examples.csv DELETED Viewed

@@ -1,30 +0,0 @@
-,index,content,word_count
-0,225644,The first prototype did not clip together well and had strength issues so we redesigned it with new sides and a different tabs structure.,24
-1,989071,Maybe you own a shop or perhaps you and your friends want to go in on this together to save some money.,22
-2,332310,"With this campaign we want to propose ""Eternity Dice Regular and Charms Edition"", sculpted by hand in stone, with a polished finish and highly accurate details.",26
-3,101474,"It's hand cut from a thick and reliable high quality calf skin, which is soft and flexible enough for wearing with utmost comfort.",23
-4,1641986,"a#  by  5       WHAT SEPARATES US FROM THE COMPETITION     a lax-ll 360 AUDIO FLOATABLE Full submergable up to Superior surround sound Counter balanced for optimal 1 meter for 30 minutes audio direction while floating     WIRELESS SPECIFICATIONS MATERIALS  sarr of whreless Small and compact, with Engineered to perfection streaming range enormous sound with the highest quality  materials avalable     PRICE-POINT WARRANTY BVURABILITY  Affordable technology Cone yearlimited warranty | Rubberized shock absorbing cover     PATENTS BUILT-IN MIC BATTERY LIFE  Patent.Pending stabalization .",78
-5,1632938,Much of the known world is either from this culture or has converted to the faith.,16
-6,1141502,"The more I play it, the more I want to play it.",12
-7,1424712,"There are weapons all around you, you just never thought about your household goods that way.",16
-8,460625,"In September, I'm going down to Virginia with a bunch of my music buddies to record the album.",18
-9,179267,"It is suitable for use with Cthulhu, Horror, Space and Dungeon - style miniature games.",15
-10,1092530,Games of the imagination teach us actions have consequences in a realm that can be reset.,16
-11,1050585,"Intense cleaning of the existing space, brick repairs, and removal of unneeded materials is also necessary.",16
-12,1126342,These will include color artwork and fully designed stats to help you build exciting and unique Shadowlands encounters.,18
-13,277427,"If you're leaving the backpack unattended, the bag itself can be secured to almost any fixed object using the integrated steel wire and combination lock, making it impossible for opportunistic thieves to access your belongings or steal the bag, without special cutting equipment.",43
-14,307425,Their parents had recruited the police and even had the church issuing official statements forbidding the girls to walk through monastery doors.,22
-15,611566,is a childrenâs book for elementary school age kids with illustrations appealing to people of all ages.,17
-16,951173,"Thanks to you we reached our original goal, so we got festival fees and insurance covered.",16
-17,1294624,"Â It's been really well-received, and recently won an online award for Best New Tabletop Sports Game of 2013.",19
-18,686912,"But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.",18
-19,1291430,"Support Cards for easily setting initiative and keeping track of hit points, ammo, etc, speeding things up and eliminating the need for any writing/erasing Deep character creation with options designed for interesting roleplaying, and super fast to create (5 minutes or less) Specially laminated Character Cards take the place of the old character sheet, making information extremely easy to find and removing clutter from the gaming table Easily expandable without having to purchase and read through lengthy new books - newÂ equipment, weapons, powers, skills, and opponents can be instantly added to your game with Setting Cards All special rules for equipment, weapons, powers, skills, and opponents printed on cards kept in player hands, so you never have to go searching for them Completely genre neutral, so assets from any setting are completely compatible with any others, making your game infinitely expandable and customizable Tech-themed Resolution Deck Concept Built from the ground up with VTTs (Virtual Table Tops) in mind, with all digital assets ready to drop into your game to integrate seamlessly with groups who play remotely Complete playable module with starter adventure included in backer rewards of $10 or more!",192
-20,1656635,"Their bond of friendship makes the journey more important than the destination as they share their dreams, frustrations and fears.Â The story goes on to show the dramatic impact this innocent childhood adventure has on their young adult lives.",39
-21,1679298,"He also is the Head Designer of The Design Trust so-to-speak, besides his regular job ...",16
-22,337389,"This year, the film team has plans to produce a short comedy, based on a true story set in the city of Jerusalem.",23
-23,980529,"$12,000 - Roguelike Player Mat This player mat will include extra rules to play Baldrick's Tomb as a solo player Roguelike.",21
-24,1700094,_ Thank you for viewing the project!,7
-25,420192,We appreciate your support and thank you for joining us in helping cause this mission stay in action.,18
-26,1469419,It'll even be foil-wrapped like baseball cards!,7
-27,105008,We believe that the major players with their massive branding campaigns together with the margins applied by distributors and retailers are a business model that doesnât deliver a fair value to customers.,32
-28,1505209,"If you want to take advantage of the Rhino Slider's versatility, you'll have an option to add extra sets of rails after the campaign ends.",25

assets/example.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+content,
+What are some of your favorite jokes? Let us know!,cov
+Is anyone being creative with their snow day? ,cov
+Did you see our latest movie?,cov
+Come hang out with us! ,cov
+Hey beautiful people! What would you like to see us doing more (or less) of !,cov
+Ends tonight! Shop select certifiably comfortable shoes!,Assertive
+Just Do it! ,Assertive
+Don't miss our products !,Assertive
+"In fact, we discovered that Woollip works better that what we imagined.",Infor
+"It is made of Titanium Grade 5, a material famous for being very strong yet very light.",Infor
+Each game already comes with six characters.,Infor
+We thank you personally for the trust you are putting in us and our company.,Emo
+I wear it everyday and am very happy with it!,Emo
+We are so grateful for our everyday heroes who never cease to amaze us!,Emo

assets/examples.txt CHANGED Viewed

@@ -1,14 +1,14 @@
-Games of the imagination teach us actions have consequences in a realm that can be reset.
-Intense cleaning of the existing space, brick repairs, and removal of unneeded materials is also necessary.
-Thanks to you we reached our original goal, so we got festival fees and insurance covered.
-Â It's been really well-received, and recently won an online award for Best New Tabletop Sports Game of 2013.
-But New Jersey farmers are retiring and all over the state, development continues to push out dwindling farmland.
-Our chemical-free process provides unmatched comfort.
-However, this chart does not factor in special ability influence since that varies with the ability being used.
-I'd like to do something similar with pictures.
-This means you can feel more than comfortable putting them in your back pocket or purse.
-She holds a degree from the Advertising University of Madrid.
-Skeleton Birds are heading to Groovebox Studios on March 17th to record and film a live GBS Detroit EP and video.
-Please help support us & make this awesome case a reality!
-So... We're asking for $3,000 per song.
-You also have battle items and action cards to defeat your gnome enemies.

+What are some of your favorite jokes? Let us know!
+Is anyone being creative with their snow day?
+Did you see our latest movie?
+Come hang out with us!
+Hey beautiful people! What would you like to see us doing more (or less) of !
+Ends tonight! Shop select certifiably comfortable shoes!
+Just Do it!
+Don't miss our products !
+In fact, we discovered that Woollip works better that what we imagined.
+It is made of Titanium Grade 5, a material famous for being very strong yet very light.
+Each game already comes with six characters.
+We thank you personally for the trust you are putting in us and our company.
+I wear it everyday and am very happy with it!
+We are so grateful for our everyday heroes who never cease to amaze us!

convert.py CHANGED Viewed

@@ -3,10 +3,12 @@ import glob
 import os
 from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
-LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone"]
 tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=4)
 id2label = {i:label for i,label in enumerate(LABEL_COLUMNS)}
 label2id = {label:i for i,label in enumerate(LABEL_COLUMNS)}

 import os
 from transformers import BertTokenizerFast as BertTokenizer, BertForSequenceClassification
+os.environ['https_proxy'] = "127.0.0.1:1081"
+LABEL_COLUMNS = ["Assertive Tone", "Conversational Tone", "Emotional Tone", "Informative Tone", "None"]
 tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=5)
 id2label = {i:label for i,label in enumerate(LABEL_COLUMNS)}
 label2id = {label:i for i,label in enumerate(LABEL_COLUMNS)}

models/All_Data/config.json CHANGED Viewed

@@ -13,7 +13,8 @@
     "0": "Assertive Tone",
     "1": "Conversational Tone",
     "2": "Emotional Tone",
-    "3": "Informative Tone"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
@@ -21,7 +22,8 @@
     "Assertive Tone": 0,
     "Conversational Tone": 1,
     "Emotional Tone": 2,
-    "Informative Tone": 3
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

     "0": "Assertive Tone",
     "1": "Conversational Tone",
     "2": "Emotional Tone",
+    "3": "Informative Tone",
+    "4": "None"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
     "Assertive Tone": 0,
     "Conversational Tone": 1,
     "Emotional Tone": 2,
+    "Informative Tone": 3,
+    "None": 4
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

models/All_Data/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4edf18d14298c9a7057bbbdbc88cddf3b673e452103c6c4b882e1cec14d51c53
-size 438021294

 version https://git-lfs.github.com/spec/v1
+oid sha256:593dc3210abcc95df5a0f63580ce571df2b60c39cc4f1d7122e371c9f37c4c64
+size 438024366

models/Facebook/config.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "Assertive Tone",
-    "1": "Conversational Tone",
-    "2": "Emotional Tone",
-    "3": "Informative Tone"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "Assertive Tone": 0,
-    "Conversational Tone": 1,
-    "Emotional Tone": 2,
-    "Informative Tone": 3
-  },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.36.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

models/Facebook/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f511b8b4b91b5fa408c5b3220ce0fe9b61b2f9a3a54dd00acb3a81aa0a2a19e8
-size 438021294

models/Facebook/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

models/Kickstarter/config.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "Assertive Tone",
-    "1": "Conversational Tone",
-    "2": "Emotional Tone",
-    "3": "Informative Tone"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "Assertive Tone": 0,
-    "Conversational Tone": 1,
-    "Emotional Tone": 2,
-    "Informative Tone": 3
-  },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.36.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

models/Kickstarter/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b98553cd5a9b23babc4e20ade9abda931497de3103acf09656eb39cfcbb0c485
-size 438021294

models/Kickstarter/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

models/Twitter/config.json DELETED Viewed

@@ -1,37 +0,0 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForSequenceClassification"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "id2label": {
-    "0": "Assertive Tone",
-    "1": "Conversational Tone",
-    "2": "Emotional Tone",
-    "3": "Informative Tone"
-  },
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "label2id": {
-    "Assertive Tone": 0,
-    "Conversational Tone": 1,
-    "Emotional Tone": 2,
-    "Informative Tone": 3
-  },
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "transformers_version": "4.36.2",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

models/Twitter/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6abf83c8c66c4f3fcaba340dcab3b5b1f4f2b66381b21a5aacab086194cf0cbd
-size 438021294

models/Twitter/vocab.txt DELETED Viewed

The diff for this file is too large to render. See raw diff

tmp.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import pandas as pd
+pd.read_csv('output/example.csv')
+pd.inde
+...