32 lines
900 B
Python
32 lines
900 B
Python
import os
|
|
|
|
|
|
def write_anno(output_folder, documents, keyphrases):
|
|
# create output directory if not exists
|
|
if not os.path.exists(output_folder):
|
|
os.makedirs(output_folder)
|
|
|
|
for doc_id, doc_string in documents.items():
|
|
|
|
i = 0
|
|
output_file = open("%s/%s.%s" % (output_folder, doc_id, "ann"), "w")
|
|
|
|
for kp in keyphrases[doc_id]:
|
|
kp_string = ' '.join(kp)
|
|
|
|
for start_index in list(find_all(doc_string, kp_string)):
|
|
end_index = start_index + len(kp_string)
|
|
output_file.write("T%s\t%s %s %s\t%s\n" %
|
|
(i, "NO_TYPE", start_index, end_index, kp_string))
|
|
|
|
output_file.close()
|
|
|
|
|
|
def find_all(target_string, substring):
|
|
start = 0
|
|
while True:
|
|
start = target_string.find(substring, start)
|
|
if start == -1: return
|
|
yield start
|
|
start += 1
|