File size: 364 Bytes
42bcb30
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# -*- coding: utf-8 -*-
from pysbd.abbreviation_replacer import AbbreviationReplacer
from pysbd.lang.common import Common, Standard

class Urdu(Common, Standard):

    iso_code = 'ur'

    SENTENCE_BOUNDARY_REGEX = r'.*?[۔؟!\?]|.*?$'
    Punctuations = ['?', '!', '۔', '؟']

    class AbbreviationReplacer(AbbreviationReplacer):
        SENTENCE_STARTERS = []