12345678910111213141516171819202122232425262728293031323334353637383940 |
- from datetime import datetime
- import pandas as pd
- def convert_date(date_str):
- cut_index = str(date_str).rfind("_")
- date = date_str[0:cut_index].replace("_", "-")
- time = date_str[cut_index + 1:].replace(":", ".")
- return datetime.strptime(f"{date} {time}", '%Y-%m-%d %H.%M.%S.%f')
- df = pd.read_csv(r"d:/data/b2_240828_2324_Err 1.csv", header=1)
- df.dropna(subset='TimeStamp', inplace=True)
- df.drop_duplicates(subset='TimeStamp', keep="first", inplace=True)
- origin_columns = list(df.columns)
- df['TimeStamp1'] = df['TimeStamp'].apply(convert_date)
- df.sort_values(by='TimeStamp1', inplace=True)
- # df['DateTime'] = pd.to_datetime(df['TimeStamp'], format="%Y-%m-%d %H:%M:%S")
- df['DateTime'] = df['TimeStamp1'].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"))
- print(df.shape)
- dateTime_count = df['DateTime'].value_counts()
- dateTime_count_1 = dateTime_count[dateTime_count == 1]
- dateTime_count_gt1 = dateTime_count[dateTime_count > 1]
- df1 = df[df['DateTime'].isin(dateTime_count_1.index.values)]
- df2 = df[df['DateTime'].isin(dateTime_count_gt1.index.values)]
- print(df1.shape)
- print(df2.shape)
- origin_columns.insert(0, 'DateTime')
- df1.to_csv("1秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
- df2.to_csv("毫秒数据.csv", encoding='utf-8', index=False, columns=origin_columns, date_format="%Y-%m-%d %H:%M:%S.%f")
|