File size: 885 Bytes
9f1c059
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/tcsh -f
if ($# != 2) then
	echo "argument 1 = tagged_text"
	echo "argument 2 = pos list"
	exit(-1)
endif

set lines_text = `cat $1|wc -l`
set k = 1

while ($k <= $lines_text)
	set plain_line = `cat $1|head -$k|tail -1`
	echo $plain_line > plain_line

	set num_words = `cat plain_line|head -$k|tail -1|wc -w`
	set i = 1
	while ($i <= $num_words)
		set word = `cat plain_line|head -$k|tail -1|cut -d " " -f$i`
		set tag = `cat plain_line|head -$k|tail -1|cut -d " " -f$i|cut -d "/" -f2`
		set chk_tag = `grep -w $tag $2|wc -l`

		if ($chk_tag != 0) then
			echo $word >> break_points
		endif
		@ i++
	end

	set breaks = `cat break_points |wc -l`

	set j = 1
	while ($j <= $breaks)
		set old = `cat break_points|head -$j|tail -1`

		sed -i "s@$old@$old ,/RD_PUNC @" plain_line
		@ j++
	end
	sed -i 's/  / /g' plain_line
	./scripts/detagging plain_line
	rm break_points
	@ k++
end