import requests
import json
import sys
import os
import pandas
import time

username = ""
password = ""

old_column_name = ["id", "Openness", "Openness_e", "Adventurousness", "Adventurousness_e", "Artistic interests", "Artistic interests_e", "Emotionality", "Emotionality_e", "Imagination", "Imagination_e", "Intellect", "Intellect_e", "Authority-challenging", "Authority-challenging_e", "Conscientiousness", "Conscientiousness_e", "Achievement striving", "Achievement striving_e", "Cautiousness", "Cautiousness_e", "Dutifulness", "Dutifulness_e", "Orderliness", "Orderliness_e", "Self-discipline", "Self-discipline_e", "Self-efficacy", "Self-efficacy_e", "Extraversion", "Extraversion_e", "Activity level", "Activity level_e", "Assertiveness", "Assertiveness_e", "Cheerfulness", "Cheerfulness_e", "Excitement-seeking", "Excitement-seeking_e", "Outgoing", "Outgoing_e", "Gregariousness", "Gregariousness_e", "Agreeableness", "Agreeableness_e", "Altruism", "Altruism_e", "Cooperation", "Cooperation_e", "Modesty", "Modesty_e", "Uncompromising", "Uncompromising_e", "Sympathy", "Sympathy_e", "Trust", "Trust_e", "Emotional range", "Emotional range_e", "Fiery", "Fiery_e", "Prone to worry", "Prone to worry_e", "Melancholy", "Melancholy_e", "Immoderation", "Immoderation_e", "Self-consciousness", "Self-consciousness_e", "Susceptible to stress", "Susceptible to stress_e", "Challenge", "Challenge_e", "Closeness", "Closeness_e", "Curiosity", "Curiosity_e", "Excitement", "Excitement_e", "Harmony", "Harmony_e", "Ideal", "Ideal_e", "Liberty", "Liberty_e", "Love", "Love_e", "Practicality", "Practicality_e", "Self-expression", "Self-expression_e", "Stability", "Stability_e", "Structure", "Structure_e", "Conservation", "Conservation_e", "Openness to change", "Openness to change_e", "Hedonism", "Hedonism_e", "Self-enhancement", "Self-enhancement_e", "Self-transcendence", "Self-transcendence_e"]

column_name = ['id']
for column in old_column_name[1:]:
    column_name.append(column)
    column_name.append('R_' + column)
 
# def pre_aggregate(dir_name, file_list):
#     """Aggregate the data for each person and output a txt file for each person to directory 'Aggregated_files'

#     Args:
#     =====
#         dir_name:  The name of the directory that containing all the data to be aggregated 
#         file_list: The list of all the files in the target directory
        
#     Returns:
#     ========
#         output_file_list: The list of filenames in the output directory 'Aggregated_files'
#     """
#     name_list = []
#     i = 0
#     out_dir_name ='Aggregated_files'
#     if not os.path.exists(out_dir_name):
#         os.mkdir(out_dir_name)
#     for file_name in file_list:
#         name_list = []
#         with open(dir_name + '/' +file_name, "rb") as f:
#             for line in f:
#                 # get rid of the first line
#                 if i == 0:
#                     i += 1
#                     continue
#                 try:
#                     [cid, ordernum, fname, lname, content] = line.strip().split('\t')
#                 except:
#                     [cid, ordernum, fname, lname] = line.strip().split('\t')
#                     print line
#                 if '?' in lname:
#                     lname = lname.strip('?')
#                 if (fname, lname) not in name_list:
#                     fout = open(out_dir_name + '/'+ '_'.join((fname, lname))+'.txt', "w")
#                     fout.write(fname+'\t'+lname+'\n')
#                     fout.write(cid+'\t'+content+'\n\n')
#                     name_list.append((fname, lname))
#                 else:
#                     fout = open(out_dir_name + '/'+ '_'.join((fname, lname))+'.txt', "a")
#                     fout.write(cid+'\t'+content+'\n'+'\n')
#                 fout.close()
#         output_file_list = ['_'.join((x[0], x[1]))+'.txt' for x in name_list]
#         return output_file_list


# def read_data(dir_name, file_list):
#     """Read all the data from a specific directory
    
#     Args:
#     =====
#         dir_name:  The name of the directory that containing all the aggregated data
#         file_list: The list of all the files in the target directory

#     Returns:
#     ========
#         text: A list of tuple which contains the text, id, and type of each file
#     """
#     text = []
#     for file_name in file_list:
#         if file_name[-3:] == "txt":
#             content = process_txt_file(dir_name + "/" + file_name)
#             tmp = file_name[:-4]
#             text.append((content, tmp.split('_')[0], tmp.split('_')[1]))
# #        elif file_name[-3:] == "csv":
# #            data = process_csv_file(dir_name + "/" + file_name)
# #            if data is not None:
# #                text.append((data, file_name[:-4], "csv"))
#     return text

# def process_txt_file(file_name):
#     """Clean the txt file
    
#     Args:
#     =====
#         file_name: The name of the file which you want to process

#     Returns:
#     ========
#         content: pure text of the file
#     """
#     i = 0
#     content = ""
#     with open(file_name, "rb") as f:
#         for line in f:
#             # get rid of the first line
#             if i == 0:
#                 i += 1
#                 continue
#             content += " ".join(line.strip().split(" ")[1:])
#         f.close()
#     return content

# def process_csv_file(file_name):
#     """Clean the csv file and get rid of useless csv files
    
#     Args:
#     =====
#         file_name: The name of the file which you want to process

#     Returns:
#     ========
#         content: pure text of the file
#     """
#     file = pandas.read_csv(file_name)
#     if "tweet" not in file.columns:
#         return None
#     else:
#         content = " ".join(file["tweet"].tolist())
#         return content

def process_txt_file_2(dir_name, file_name):
    i = 0
    text = []
    with open(dir_name + "/" + file_name, "rb") as f:
        for line in f:
            # get rid of the first line
            if i == 0:
                i += 1
                continue
            if line == '\r\n' or line == '\n':
                continue
            if len(text) > 0 and text[-1][0] == line.split('\t')[0]:
                text[-1][1] += line.split('\t')[1]
                continue
            text.append(line.split('\t'))
        f.close()
    return text

def parse_json(tree, id):
    """Get all the personalities and their corresponding sampling error from

    Args:
    =====
        tree: a nested dictionary which contains all the information that we get from IBM watson
        id:   the id of the current user
        type: file type

    Returns:
    ========
        personalities: a list of all the personalities
    """
    personalities = [id]
    if "tree" not in tree:
        personalities.extend(["0"] * (len(column_name) - 1))
    else:
        # for name in tree["tree"]["children"]:
        #     for personality in name["children"][0]["children"]:
        #         personalities.extend([str(personality["percentage"]), str(personality["sampling_error"])])
        #         if name["name"] == "Big 5":
        #             for sub_personality in personality["children"]:
        #                 personalities.extend([str(sub_personality["percentage"]), str(sub_personality["sampling_error"])])

        for name in tree["tree"]["children"]:
            for personality in name["children"][0]["children"]:
                personalities.extend([str(personality["percentage"]), str(personality["raw_score"]), str(personality["sampling_error"]), str(personality["raw_sampling_error"])])
                if name["name"] == "Big 5":
                    for sub_personality in personality["children"]:
                        personalities.extend([str(sub_personality["percentage"]), str(sub_personality["raw_score"]), str(sub_personality["sampling_error"]), str(sub_personality["raw_sampling_error"])])
    return personalities

def analyze_text(text):
    """ Use IBM watson to analyze the text
    """
    total_personalities = map(lambda info: parse_json(
    json.loads(requests.post("https://gateway.watsonplatform.net/personality-insights/api/v2/profile", auth=(username, password), headers = {"content-type": "text/plain"}, params = {"include_raw": True}, data = info[1]).text), info[0]), text)
    return total_personalities

def save_to_txt_file(output_filename, header, data):
    """Save result to file
    
    Args:
    =====
        header: schema
        data: the data you want to save
    """
    with open(output_filename, "wb") as f:
        f.write("\t".join(header) + "\n")
        final_data = "\n".join(map(lambda x: "\t".join(x), data))
        f.write(final_data)
        f.close()

def main():
    start = time.time()
    dir_name = sys.argv[1]
    file_list = os.listdir(dir_name)
    if not os.path.isdir('output/'):
        os.mkdir("output/")
    for file_name in file_list:
        data = process_txt_file_2(dir_name, file_name)
#        for tag in data:
#            print tag
        columns = analyze_text(data)
        save_to_txt_file('output/' + file_name, column_name, columns)
    end = time.time()
    print end - start

if __name__ == '__main__':
    main()