error_ms_data.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. from datetime import datetime
  2. import pandas as pd
  3. def convert_date(date_str):
  4. cut_index = str(date_str).rfind("_")
  5. date = date_str[0:cut_index].replace("_", "-")
  6. time = date_str[cut_index + 1:].replace(":", ".")
  7. return datetime.strptime(f"{date} {time}", '%Y-%m-%d %H.%M.%S.%f')
  8. df = pd.read_csv(r"d:/data/b2_240828_2324_Err 1.csv", header=1)
  9. df.dropna(subset='TimeStamp', inplace=True)
  10. df.drop_duplicates(subset='TimeStamp', keep="first", inplace=True)
  11. origin_columns = list(df.columns)
  12. df['TimeStamp1'] = df['TimeStamp'].apply(convert_date)
  13. df.sort_values(by='TimeStamp1', inplace=True)
  14. # df['DateTime'] = pd.to_datetime(df['TimeStamp'], format="%Y-%m-%d %H:%M:%S")
  15. df['DateTime'] = df['TimeStamp1'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
  16. print(df.shape)
  17. dateTime_count = df['DateTime'].value_counts()
  18. dateTime_count_1 = dateTime_count[dateTime_count == 1]
  19. dateTime_count_gt1 = dateTime_count[dateTime_count > 1]
  20. df1 = df[df['DateTime'].isin(dateTime_count_1.index.values)]
  21. df2 = df[df['DateTime'].isin(dateTime_count_gt1.index.values)]
  22. print(df1.shape)
  23. print(df2.shape)
  24. origin_columns.insert(0, 'DateTime')
  25. df1.to_csv("1秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
  26. df2.to_csv("毫秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")