1 files changed, 304 insertions, 0 deletions
diff --git a/JLanguageTool/src/resource/en/remap.awk b/JLanguageTool/src/resource/en/remap.awk
new file mode 100644
index 0000000..0e795c3
--- /dev/null
+++ b/JLanguageTool/src/resource/en/remap.awk
@@ -0,0 +1,304 @@
+BEGIN {#FS=":|\|"
+#Noun                        	
+map["N"]="NN"
+#Plural                      
+map["p"]="NNS"
+#Noun Phrase		
+map["h"]="" #delete this in FSA
+#Verb (usu participle)    
+map["V"]="VBG"
+#Verb (transitive)     	
+map["t"]="VB"
+#Verb (intransitive)  	
+map["i"]="VB" #no difference here! use a new tag?
+#Adjective                     
+map["A"]="JJ"
+#Adverb                    	
+map["v"]="RB"
+#Conjunction             	
+map["C"]="CC"
+#Preposition             	
+map["P"]="IN"
+#Interjection            	
+map["!"]="UH"
+#Pronoun                    	
+map["r"]="PRP"
+#Definite Article       	
+map["D"]="DT"
+#Indefinite Article     	
+map["I"]="DT"
+#Nominative               	
+map["o"]="" # no such tags in Moby?
+#Technical mark
+map["\|"]=""	#delete this, unusable
+map["\?"]=""	#as above
+}
+#additional rules: JJ & "er"_END_OF_WORD=JJR
+#additional rules: JJ & "est"_END_OF_WORD=JJS
+#NN & Uppercase = NNP
+#NNS & Uppercase = NNPS
+#verbs: case of 5 fields
+#give V: gave | given | giving | gives
+# VBP     VBD    VBN     VBG      VBZ
+#in case of 4 fields: VBD=VBN.
+#PRP$, WDT, WP, WP$, WRB only by enumeration
+#MD by enumeration
+
+#kill single-letter entries:
+/^[a-zA-Z][\t ]/ {$0=""}
+
+#adjectives
+/A:|A\?:/ && !/'/ {
+gsub(/{[a-zA-Z:0-9_]+}/,"")
+gsub(/,/,"")
+gsub(/[0-9]/,"")
+split ($0, adjective, /\|/)
+#print "1" adjective[1]
+#print "2" adjective[2]
+split(adjective[1], jjr,":")
+split(jjr[2], jjr_forms)
+split(adjective[2],jjs_forms)
+gsub(/\|/,"")
+mark=0
+total=0
+if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+for (n in jjr_forms) {
+	#if ($i"_END"!~/er_END/) 
+	if (jjr_forms[n]!~/\?|<|\.|!/)
+		{print jjr_forms[n] "\t" $1 "\tJJR"
+		JJR[jjr_forms[n]]=$1
+		set++
+		}
+	}
+for (n in jjs_forms) {	
+	if (jjs_forms[n]!~/\?|<|\.|!/)
+		{print jjs_forms[n] "\t" $1 "\tJJS"
+		JJS[jjs_forms[n]]=$1
+		set++
+		}
+	}
+}
+
+/N:|N\?:/ && !/'/ {
+if ($1~/less/ && $2~/N\?:/ && NF==3) {
+ print $1 "\t" $1 "\tJJ"
+}
+else {
+if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+gsub(/[,<]/,"")
+gsub(/{[a-zA-Z:0-9_]+}/,"")
+gsub(/[0-9]\.[0-9]/,"")
+gsub(/[0-9]+ /,"")
+for (i=3;i<=NF;i++) {
+#	print ">>" $i
+#	print "NF=" NF, "string is:" $0
+	if ($i!~/[\?\~\!]/ && "PFX"$i"SFX"!~/PFX([0-9]+|\|)SFX/)
+	if ($i!~/[A-Z]/) {print $i "\t" $1 "\tNNS"
+				nns[$i]=$1
+				}
+	else 
+		print $i "\t" $1 "\tNNPS"
+}
+}
+}
+
+/V:/ && !/'/ {
+gsub(/,/,"")
+gsub(/{[a-zA-Z:0-9_]+}/,"")
+gsub(/[0-9]\.[0-9]/,"")
+gsub(/[0-9]+ /,"")
+split($0,verb_fields,/\||:/)
+#for (kk=1;kk<=10;kk++) {
+#	print kk verb_fields[kk]
+#	}
+if (verb_fields[5]=="") {
+	print $1 "\t" $1 "\tVB"
+	print $1 "\t" $1 "\tVBP"
+	split (verb_fields[2], VBD, " ")
+	for (n in VBD) {
+	if (VBD[n]!~/[\?\~\!<]/ && "PFX"VBD[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBD[n] "\t" $1 "\tVBD"
+	if (VBD[n]!~/[\?\~\!<]/ && "PFX"VBD[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBD[n] "\t" $1 "\tVBN"
+	}
+	split (verb_fields[3], VBG, " ")
+	for (n in VBG) {
+	if (VBG[n]!~/[\?\~\!<]/ && "PFX"VBG[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBG[n] "\t" $1 "\tVBG"
+	}
+	split (verb_fields[4], VBZ, " ")
+	for (n in VBZ) {
+	if (VBZ[n]!~/[\?\~\!<]/ && "PFX"VBZ[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBZ[n] "\t" $1 "\tVBZ"
+	}
+	}
+if (verb_fields[5]!="") {
+	print $1 "\t" $1 "\tVB"
+	print $1 "\t" $1 "\tVBP"
+	split (verb_fields[2], VBD, " ")
+	for (n in VBD) {
+		if (VBD[n]!~/[\?\~\!<]/ && "PFX"VBD[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBD[n] "\t" $1 "\tVBD"
+	}
+	split (verb_fields[3], VBN, " ")
+	for (n in VBN) {
+		if (VBN[n]!~/[\?\~\!<]/ && "PFX"VBN[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBN[n] "\t" $1 "\tVBN"
+	}
+	split (verb_fields[4], VBG, " ")
+	for (n in VBG) {
+		if (VBG[n]!~/[\?\~\!<]/ && "PFX"VBG[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBG[n] "\t" $1 "\tVBG"
+	}
+	split (verb_fields[5], VBZ, " ")
+	for (n in VBZ) {
+		if (VBZ[n]!~/[\?\~\!<]/ && "PFX"VBZ[n]"SFX"!~/PFX([0-9]+|\|)SFX/) print VBZ[n] "\t" $1 "\tVBZ"
+	}
+	}
+}
+
+
+"BEGIN"$2"END"~/BEGINCEND/ { print $1 "\t" $1 "\tCC"}
+"BEGIN"$2"END"~/BEGINPEND/ { print $1 "\t" $1 "\t"map[$2]}
+"BEGIN"$2"END"~/BEGIN\!END/ { print $1 "\t" $1 "\t"map[$2]}
+#aaaa "BEGIN"$2"END"~/BEGINDEND/ { print $1 "\t" $1 "\t"map[$2]} #its as determiner!
+"BEGIN"$2"END"~/BEGINIEND/ && !/ /{ print $1 "\t" $1 "\t"map[$2]} 
+"BEGIN"$2"END"~/BEGINrEND/ { print $1 "\t" $1 "\t"map[$2]}
+"BEGIN"$2"END"~/BEGINvEND/ { print $1 "\t" $1 "\t"map[$2]}
+#"BEGIN"$2"END"~/BEGINANEND/ { if (JJR[$1]=="" && JJS[$1]=="") print $1 "\t" $1 "\tJJ"}
+
+#interjections
+/!/ && !/[ ']/ {
+if ($1!~/!/) {
+	print $1 "\t" $1 "\t"map["!"]
+	gsub(/!/,"")
+	}
+	}
+
+
+/\t(vA|Av)$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"]
+	print $1 "\t" $1 "\t"map["A"]}
+
+/\tvAN$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"]
+	print $1 "\t" $1 "\t"map["A"]
+	print $1 "\t" $1 "\t"map["N"]
+	}
+
+/\tvAtV$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"] 
+gsub(/\tvAtV/,"\tAVt")
+}
+
+/\tv$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"]}
+
+/\tvP$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"]
+print $1 "\t" $1 "\t"map["P"]
+}
+
+/\tvN$/ && !/[ ']/ {print $1 "\t" $1 "\t"map["v"]
+print $1 "\t" $1 "\t"map["N"]
+}
+
+/\t(\|v|vP)/ && !/[ ']/ {
+	if (JJR[$1]!="") {
+	print $1 "\t" $1 "\tRBR"}
+	if (JJS[$1]!="") {
+	print $1 "\t" $1 "\tRBS"}
+	if (JJS[$1]=="" && JJS[$1]=="") {
+	print $1 "\t" $1 "\t"map["v"]}
+	}
+
+{gsub(/\tvPC/, "\tPCv")
+ gsub(/\tvC/,"\tCv/")
+}
+
+/\tPCv/ {print $1 "\t" $1 "\t"map["P"]
+	print $1 "\t" $1 "\t"map["C"]
+	print $1 "\t" $1 "\t"map["v"]}
+
+/\tCv/ {print $1 "\t" $1 "\t"map["C"]
+	print $1 "\t" $1 "\t"map["v"]}
+
+/\tANtV/ {if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+		if (nns[$1]=="" || $1"_END"~/ics_END/) {
+			if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+		}
+		print $1 "\t" $1 "\t"map["t"]
+		print $1 "\t" $1 "\t"map["V"]
+		}
+/\tvNA$/ && !/'/  {
+print $1 "\t" $1 "\t"map["v"]
+gsub(/\tvNA/,"\tNA")
+}
+
+/\t[AN][AN]/ && !/'/ {
+		if (JJR[$1]=="" && JJS[$1]=="" && $1"_END"!~/ism_END/) print $1 "\t" $1 "\tJJ" 
+		if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+	 }
+/\tN$/ && !/[ ']/ {
+	if (nns[$1]=="" || $1"_END"~/ics_END/) {
+	if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+	}
+	 }
+/\t\|N$/ && !/[ ']/ && !/^[0-9\.]+\t/{
+	if (nns[$1]=="" || $1"_END"~/ics_END/) {
+	if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+	}
+	 }
+/\t[N!][N!]/ && !/[ ']/ {
+	if (nns[$1]=="" || $1"_END"~/ics_END/) {
+	if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+	}
+	print $1 "\t" $1 "\t"map["!"]
+	 }
+	
+/\tA$/ && !/[ ']/{	
+	if (JJR[$1]=="" && JJS[$1]=="")  {
+	if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"}
+			#print $1 "\t" $1 "\t"map["A"]
+	 }
+
+/\t\|NA$/ && !/[ ']/ && !/^[0-9\.]+\t/{
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+	 }
+
+/\tpN$/ && !/[ ']/ {if (nns[$1]=="") {if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNNS"; else print $1 "\t" $1 "\tNNPS"}
+	 }
+/\tp$/ && !/[ ']/ {if (nns[$1]=="") {if ($1~/[A-Z]/) print $1 "\t" $1 "\tNNPS"}
+	 }
+/\tDA$/ && !/[ ']/{
+#	if (JJR[$1]=="" && JJS[$1]=="")  
+#			print $1 "\t" $1 "\t"map["A"]
+	print $1 "\t" $1 "\t"map["D"]
+	 }
+/\tAv$/ && !/[ ']/{
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+#	print $1 "\t" $1 "\t"map["v"]
+}
+/\t(AV|AVti|AVt)$/ && !/[ ']/{ #AVti - only two words: articulate, foliate
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+#	print $1 "\t" $1 "\t"map["V"]
+#this is wrong, actually V = VBG & VBN
+}
+
+
+/\t\|A$/ && !/[ ']/{
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+	 }
+
+/\t\|Av$/ && !/[ ']/{
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+	
+	 }
+
+/\tAtNV$/ && !/[ ']/ {
+	if (JJR[$1]=="" && JJS[$1]=="")  
+			if ($1"_END"!~/ly_END/ || $1"_END"~/early_END/) print $1 "\t" $1 "\tJJ"; else print $1 "\t" $1 "\tRB"
+	if (nns[$1]=="" || $1"_END"~/ics_END/) {
+		if ($1!~/[A-Z]/) print $1 "\t" $1 "\tNN"; else print $1 "\t" $1 "\tNNP"
+		}
+	print $1 "\t" $1 "\t"map["t"]
+	print $1 "\t" $1 "\t"map["V"]
+	}
+
+/\t\!$/ && !/[ ']/{
+		print $1 "\t" $1 "\t"map["!"]
+	 }