Jesse Karmani commited on
Commit
a0c5750
·
1 Parent(s): 678f004

Remove default __main__ method

Browse files
Files changed (1) hide show
  1. app.py +197 -197
app.py CHANGED
@@ -362,203 +362,203 @@ def main(
362
  print(f"Translation done.\n")
363
 
364
 
365
- if __name__ == "__main__":
366
- parser = argparse.ArgumentParser(description="Run the translation experiments")
367
- input_group = parser.add_mutually_exclusive_group(required=True)
368
- input_group.add_argument(
369
- "--sentences_path",
370
- default=None,
371
- type=str,
372
- help="Path to a txt file containing the sentences to translate. One sentence per line.",
373
- )
374
-
375
- input_group.add_argument(
376
- "--sentences_dir",
377
- type=str,
378
- default=None,
379
- help="Path to a directory containing the sentences to translate. "
380
- "Sentences must be in .txt files containing containing one sentence per line.",
381
- )
382
-
383
- parser.add_argument(
384
- "--files_extension",
385
- type=str,
386
- default="txt",
387
- help="If sentences_dir is specified, extension of the files to translate. Defaults to txt. "
388
- "If set to an empty string, we will translate all files in the directory.",
389
- )
390
-
391
- parser.add_argument(
392
- "--output_path",
393
- type=str,
394
- required=True,
395
- help="Path to a txt file where the translated sentences will be written. If the input is a directory, "
396
- "the output will be a directory with the same structure.",
397
- )
398
-
399
- parser.add_argument(
400
- "--source_lang",
401
- type=str,
402
- default=None,
403
- required=False,
404
- help="Source language id. See: supported_languages.md. Required for m2m100 and nllb200",
405
- )
406
-
407
- parser.add_argument(
408
- "--target_lang",
409
- type=str,
410
- default=None,
411
- required=False,
412
- help="Source language id. See: supported_languages.md. Required for m2m100 and nllb200",
413
- )
414
-
415
- parser.add_argument(
416
- "--starting_batch_size",
417
- type=int,
418
- default=128,
419
- help="Starting batch size, we will automatically reduce it if we find an OOM error."
420
- "If you use multiple devices, we will divide this number by the number of devices.",
421
- )
422
-
423
- parser.add_argument(
424
- "--model_name",
425
- type=str,
426
- default="facebook/m2m100_1.2B",
427
- help="Path to the model to use. See: https://huggingface.co/models",
428
- )
429
-
430
- parser.add_argument(
431
- "--lora_weights_name_or_path",
432
- type=str,
433
- default=None,
434
- help="If the model uses LoRA weights, path to those weights. See: https://github.com/huggingface/peft",
435
- )
436
-
437
- parser.add_argument(
438
- "--force_auto_device_map",
439
- action="store_true",
440
- help=" Whether to force the use of the auto device map. If set to True, "
441
- "the model will be split across GPUs and CPU to fit the model in memory. "
442
- "If set to False, a full copy of the model will be loaded into each GPU. Defaults to False.",
443
- )
444
-
445
- parser.add_argument(
446
- "--max_length",
447
- type=int,
448
- default=256,
449
- help="Maximum number of tokens in the source sentence and generated sentence. "
450
- "Increase this value to translate longer sentences, at the cost of increasing memory usage.",
451
- )
452
-
453
- parser.add_argument(
454
- "--num_beams",
455
- type=int,
456
- default=5,
457
- help="Number of beams for beam search, m2m10 author recommends 5, but it might use too much memory",
458
- )
459
-
460
- parser.add_argument(
461
- "--num_return_sequences",
462
- type=int,
463
- default=1,
464
- help="Number of possible translation to return for each sentence (num_return_sequences<=num_beams).",
465
- )
466
-
467
- parser.add_argument(
468
- "--precision",
469
- type=str,
470
- default=None,
471
- choices=["bf16", "fp16", "32", "4", "8"],
472
- help="Precision of the model. bf16, fp16 or 32, 8 , 4 "
473
- "(4bits/8bits quantification, requires bitsandbytes library: https://github.com/TimDettmers/bitsandbytes). "
474
- "If None, we will use the torch.dtype of the model weights.",
475
- )
476
-
477
- parser.add_argument(
478
- "--do_sample",
479
- action="store_true",
480
- help="Use sampling instead of beam search.",
481
- )
482
-
483
- parser.add_argument(
484
- "--temperature",
485
- type=float,
486
- default=0.8,
487
- help="Temperature for sampling, value used only if do_sample is True.",
488
- )
489
-
490
- parser.add_argument(
491
- "--top_k",
492
- type=int,
493
- default=100,
494
- help="If do_sample is True, will sample from the top k most likely tokens.",
495
- )
496
-
497
- parser.add_argument(
498
- "--top_p",
499
- type=float,
500
- default=0.75,
501
- help="If do_sample is True, will sample from the top k most likely tokens.",
502
- )
503
-
504
- parser.add_argument(
505
- "--keep_special_tokens",
506
- action="store_true",
507
- help="Keep special tokens in the decoded text.",
508
- )
509
-
510
- parser.add_argument(
511
- "--keep_tokenization_spaces",
512
- action="store_true",
513
- help="Do not clean spaces in the decoded text.",
514
- )
515
-
516
- parser.add_argument(
517
- "--repetition_penalty",
518
- type=float,
519
- default=None,
520
- help="Repetition penalty.",
521
- )
522
-
523
- parser.add_argument(
524
- "--prompt",
525
- type=str,
526
- default=None,
527
- help="Prompt to use for generation. "
528
- "It must include the special token %%SENTENCE%% which will be replaced by the sentence to translate.",
529
- )
530
-
531
- parser.add_argument(
532
- "--trust_remote_code",
533
- action="store_true",
534
- help="If set we will trust remote code in HuggingFace models. This is required for some models.",
535
- )
536
-
537
- args = parser.parse_args()
538
-
539
- main(
540
- sentences_path=args.sentences_path,
541
- sentences_dir=args.sentences_dir,
542
- files_extension=args.files_extension,
543
- output_path=args.output_path,
544
- source_lang=args.source_lang,
545
- target_lang=args.target_lang,
546
- starting_batch_size=args.starting_batch_size,
547
- model_name=args.model_name,
548
- max_length=args.max_length,
549
- num_beams=args.num_beams,
550
- num_return_sequences=args.num_return_sequences,
551
- precision=args.precision,
552
- do_sample=args.do_sample,
553
- temperature=args.temperature,
554
- top_k=args.top_k,
555
- top_p=args.top_p,
556
- keep_special_tokens=args.keep_special_tokens,
557
- keep_tokenization_spaces=args.keep_tokenization_spaces,
558
- repetition_penalty=args.repetition_penalty,
559
- prompt=args.prompt,
560
- trust_remote_code=args.trust_remote_code,
561
- )
562
 
563
  demo = gradio.Interface(fn=main, inputs="textbox", outputs="textbox")
564
  demo.launch(share=True)
 
362
  print(f"Translation done.\n")
363
 
364
 
365
+ # if __name__ == "__main__":
366
+ # parser = argparse.ArgumentParser(description="Run the translation experiments")
367
+ # input_group = parser.add_mutually_exclusive_group(required=True)
368
+ # input_group.add_argument(
369
+ # "--sentences_path",
370
+ # default=None,
371
+ # type=str,
372
+ # help="Path to a txt file containing the sentences to translate. One sentence per line.",
373
+ # )
374
+
375
+ # input_group.add_argument(
376
+ # "--sentences_dir",
377
+ # type=str,
378
+ # default=None,
379
+ # help="Path to a directory containing the sentences to translate. "
380
+ # "Sentences must be in .txt files containing containing one sentence per line.",
381
+ # )
382
+
383
+ # parser.add_argument(
384
+ # "--files_extension",
385
+ # type=str,
386
+ # default="txt",
387
+ # help="If sentences_dir is specified, extension of the files to translate. Defaults to txt. "
388
+ # "If set to an empty string, we will translate all files in the directory.",
389
+ # )
390
+
391
+ # parser.add_argument(
392
+ # "--output_path",
393
+ # type=str,
394
+ # required=True,
395
+ # help="Path to a txt file where the translated sentences will be written. If the input is a directory, "
396
+ # "the output will be a directory with the same structure.",
397
+ # )
398
+
399
+ # parser.add_argument(
400
+ # "--source_lang",
401
+ # type=str,
402
+ # default=None,
403
+ # required=False,
404
+ # help="Source language id. See: supported_languages.md. Required for m2m100 and nllb200",
405
+ # )
406
+
407
+ # parser.add_argument(
408
+ # "--target_lang",
409
+ # type=str,
410
+ # default=None,
411
+ # required=False,
412
+ # help="Source language id. See: supported_languages.md. Required for m2m100 and nllb200",
413
+ # )
414
+
415
+ # parser.add_argument(
416
+ # "--starting_batch_size",
417
+ # type=int,
418
+ # default=128,
419
+ # help="Starting batch size, we will automatically reduce it if we find an OOM error."
420
+ # "If you use multiple devices, we will divide this number by the number of devices.",
421
+ # )
422
+
423
+ # parser.add_argument(
424
+ # "--model_name",
425
+ # type=str,
426
+ # default="facebook/m2m100_1.2B",
427
+ # help="Path to the model to use. See: https://huggingface.co/models",
428
+ # )
429
+
430
+ # parser.add_argument(
431
+ # "--lora_weights_name_or_path",
432
+ # type=str,
433
+ # default=None,
434
+ # help="If the model uses LoRA weights, path to those weights. See: https://github.com/huggingface/peft",
435
+ # )
436
+
437
+ # parser.add_argument(
438
+ # "--force_auto_device_map",
439
+ # action="store_true",
440
+ # help=" Whether to force the use of the auto device map. If set to True, "
441
+ # "the model will be split across GPUs and CPU to fit the model in memory. "
442
+ # "If set to False, a full copy of the model will be loaded into each GPU. Defaults to False.",
443
+ # )
444
+
445
+ # parser.add_argument(
446
+ # "--max_length",
447
+ # type=int,
448
+ # default=256,
449
+ # help="Maximum number of tokens in the source sentence and generated sentence. "
450
+ # "Increase this value to translate longer sentences, at the cost of increasing memory usage.",
451
+ # )
452
+
453
+ # parser.add_argument(
454
+ # "--num_beams",
455
+ # type=int,
456
+ # default=5,
457
+ # help="Number of beams for beam search, m2m10 author recommends 5, but it might use too much memory",
458
+ # )
459
+
460
+ # parser.add_argument(
461
+ # "--num_return_sequences",
462
+ # type=int,
463
+ # default=1,
464
+ # help="Number of possible translation to return for each sentence (num_return_sequences<=num_beams).",
465
+ # )
466
+
467
+ # parser.add_argument(
468
+ # "--precision",
469
+ # type=str,
470
+ # default=None,
471
+ # choices=["bf16", "fp16", "32", "4", "8"],
472
+ # help="Precision of the model. bf16, fp16 or 32, 8 , 4 "
473
+ # "(4bits/8bits quantification, requires bitsandbytes library: https://github.com/TimDettmers/bitsandbytes). "
474
+ # "If None, we will use the torch.dtype of the model weights.",
475
+ # )
476
+
477
+ # parser.add_argument(
478
+ # "--do_sample",
479
+ # action="store_true",
480
+ # help="Use sampling instead of beam search.",
481
+ # )
482
+
483
+ # parser.add_argument(
484
+ # "--temperature",
485
+ # type=float,
486
+ # default=0.8,
487
+ # help="Temperature for sampling, value used only if do_sample is True.",
488
+ # )
489
+
490
+ # parser.add_argument(
491
+ # "--top_k",
492
+ # type=int,
493
+ # default=100,
494
+ # help="If do_sample is True, will sample from the top k most likely tokens.",
495
+ # )
496
+
497
+ # parser.add_argument(
498
+ # "--top_p",
499
+ # type=float,
500
+ # default=0.75,
501
+ # help="If do_sample is True, will sample from the top k most likely tokens.",
502
+ # )
503
+
504
+ # parser.add_argument(
505
+ # "--keep_special_tokens",
506
+ # action="store_true",
507
+ # help="Keep special tokens in the decoded text.",
508
+ # )
509
+
510
+ # parser.add_argument(
511
+ # "--keep_tokenization_spaces",
512
+ # action="store_true",
513
+ # help="Do not clean spaces in the decoded text.",
514
+ # )
515
+
516
+ # parser.add_argument(
517
+ # "--repetition_penalty",
518
+ # type=float,
519
+ # default=None,
520
+ # help="Repetition penalty.",
521
+ # )
522
+
523
+ # parser.add_argument(
524
+ # "--prompt",
525
+ # type=str,
526
+ # default=None,
527
+ # help="Prompt to use for generation. "
528
+ # "It must include the special token %%SENTENCE%% which will be replaced by the sentence to translate.",
529
+ # )
530
+
531
+ # parser.add_argument(
532
+ # "--trust_remote_code",
533
+ # action="store_true",
534
+ # help="If set we will trust remote code in HuggingFace models. This is required for some models.",
535
+ # )
536
+
537
+ # args = parser.parse_args()
538
+
539
+ # main(
540
+ # sentences_path=args.sentences_path,
541
+ # sentences_dir=args.sentences_dir,
542
+ # files_extension=args.files_extension,
543
+ # output_path=args.output_path,
544
+ # source_lang=args.source_lang,
545
+ # target_lang=args.target_lang,
546
+ # starting_batch_size=args.starting_batch_size,
547
+ # model_name=args.model_name,
548
+ # max_length=args.max_length,
549
+ # num_beams=args.num_beams,
550
+ # num_return_sequences=args.num_return_sequences,
551
+ # precision=args.precision,
552
+ # do_sample=args.do_sample,
553
+ # temperature=args.temperature,
554
+ # top_k=args.top_k,
555
+ # top_p=args.top_p,
556
+ # keep_special_tokens=args.keep_special_tokens,
557
+ # keep_tokenization_spaces=args.keep_tokenization_spaces,
558
+ # repetition_penalty=args.repetition_penalty,
559
+ # prompt=args.prompt,
560
+ # trust_remote_code=args.trust_remote_code,
561
+ # )
562
 
563
  demo = gradio.Interface(fn=main, inputs="textbox", outputs="textbox")
564
  demo.launch(share=True)