import os
os.getcwd()
Out[1]:
'D:\\AIS'
In [2]:
import pandas as pd
df=pd.read_csv("202106180000.data", header=None, sep=']')
In [3]:
df.head()
Out[3]:
01201234
[2021-06-18 00:00:00.004 | [155.155.9.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
[2021-06-18 00:00:00.004 | [155.155.9.25 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
[2021-06-18 00:00:00.004 | [155.155.6.2 | !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53 |
[2021-06-18 00:00:00.019 | [155.155.9.26 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
[2021-06-18 00:00:00.019 | [155.155.12.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
In [4]:
df.columns=['report_datetime', 'ip', 'nmea']
In [5]:
df=df.apply(lambda x: x.str.replace('[',''))
In [6]:
df.head()
Out[6]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 | 155.155.9.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.9.25 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.6.2 | !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53 |
2021-06-18 00:00:00.019 | 155.155.9.26 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.019 | 155.155.12.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
In [7]:
df=df.apply(lambda x : x.str.strip())
In [8]:
df.head()
Out[8]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 | 155.155.9.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.9.25 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.6.2 | !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53 |
2021-06-18 00:00:00.019 | 155.155.9.26 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.019 | 155.155.12.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
In [11]:
df['report_datetime']=pd.to_datetime(df['report_datetime'], format='%Y-%m-%d %H:%M:%S.%f')
In [12]:
df.head()
Out[12]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 | 155.155.9.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.9.25 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.004 | 155.155.6.2 | !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53 |
2021-06-18 00:00:00.019 | 155.155.9.26 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
2021-06-18 00:00:00.019 | 155.155.12.24 | !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27 |
In [13]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 798073 entries, 0 to 798072
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 report_datetime 798073 non-null datetime64[ns]
1 ip 798073 non-null object
2 nmea 798073 non-null object
dtypes: datetime64[ns](1), object(2)
memory usage: 18.3+ MB
In [15]:
df['nmea'].to_csv("ais_out", header=None, index=False)
In [18]:
from pyais.stream import FileReaderStream
filename='ais_out2'
In [22]:
"""
for msg in FileReaderStream(filename):
decoded_message=msg.decode()
ais_content=decoded_message.content
print(ais_content)
"""
Out[22]:
'\nfor msg in FileReaderStream(filename):\n decoded_message=msg.decode()\n ais_content=decoded_message.content\n print(ais_content)\n'
In [21]:
type(ais_content)
Out[21]:
dict
In [26]:
import csv
df['nmea'].to_csv("ais_out", header=None, index=False, doublequote=False,escapechar='"',quoting=csv.QUOTE_NONE)
In [27]:
s=df['nmea']
In [29]:
from pyais.stream import FileReaderStream
filename='ais_out2'
for msg in FileReaderStream(filename):
decoded_message=msg.decode()
ais_content=decoded_message.content
In [30]:
ais_content
Out[30]:
{'type': 21,
'repeat': 0,
'mmsi': '004403320',
'aid_type': <NavAid.FIXED: 3>,
'name': 'JANGJADO_RS',
'accuracy': True,
'lon': 126.34018166666667,
'lat': 34.793545,
'to_bow': 0,
'to_stern': 0,
'to_port': 0,
'to_starboard': 0,
'epfd': <EpfdType.GPS: 1>,
'second': 7,
'off_position': True,
'regional': 0,
'raim': True,
'virtual_aid': False,
'assigned': False,
'name_extension': ''}
In [31]:
ais_content['type']
Out[31]:
21
In [32]:
print(ais_content)
{'type': 21, 'repeat': 0, 'mmsi': '004403320', 'aid_type': <NavAid.FIXED: 3>, 'name': 'JANGJADO_RS', 'accuracy': True, 'lon': 126.34018166666667, 'lat': 34.793545, 'to_bow': 0, 'to_stern': 0, 'to_port': 0, 'to_starboard': 0, 'epfd': <EpfdType.GPS: 1>, 'second': 7, 'off_position': True, 'regional': 0, 'raim': True, 'virtual_aid': False, 'assigned': False, 'name_extension': ''}
In [43]:
df['nmea']
Out[43]:
0 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
1 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
3 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
4 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
...
798068 !AIVDM,1,1,,A,16UD>t002:`w<cfCW0D=js<008Rd,0*2A
798069 !AIVDM,1,1,,A,16Sfe36P1=a09vVCoHKEmOwl20S<,0*5E
798070 !AIVDM,1,1,,A,E000Gh?9T4W0W?WQRPW7Sa0`44QlO@n`...
798071 !ANVDM,1,1,,B,16SWpGPP03a1rvfD7cdDmgwn2<2p,0*48
798072 !ANVDM,1,1,,B,16SWpGPP03a1rvfD7cdDmgwn2<2p,0*48
Name: nmea, Length: 798073, dtype: object
In [44]:
df.loc[:30, 'nmea']
Out[44]:
0 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
1 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
3 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
4 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
5 !AIVDM,1,1,,B,16SrE>PP00a2Rn6Cqfe;7auj080t,0*2E
6 !ANVDO,1,1,,A,E04<hN1U0W3U0R7ga9P00000000DQ:oJ...
7 $ANZDA,150000.00,17,06,2021,00,00*7B
8 $ANZDA,150000.00,17,06,2021,00,00*7B
9 $ANZDA,150000.00,17,06,2021,00,00*7B
10 !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
11 !ANVDM,1,1,,A,404=P6AvE`g00a2<rbCq9dw00D3W,0*78
12 !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
13 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
14 !AIVDM,1,1,,B,16SrE>PP00a2Rn6Cqfe;7auj080t,0*2E
15 !AIVDM,1,1,,B,16SfpEUP0la1Dc`C`WmU7Owl2D1`,0*64
16 !AIVDM,1,1,,A,E000Gh?9T4W0W?WQRPW7Sa0`44QlO@nL...
17 !AIVDM,1,1,,A,16U1<<0022`wKPNCbB44wSqp0`0r,0*53
18 !AIVDM,1,1,,B,16S`@:0P03`whjrCtRsTvgwl28Rv,0*09
19 !AIVDM,1,1,,B,16SsifPP1E`vCT2Csb5l3Owl2HPw,0*7B
20 !AIVDM,1,1,,B,16S`Et0P02a19jnD<pVRL?wl2<0f,0*13
21 !AIVDM,1,1,,B,18I21a002?`w7KBCswr@:P<00HRv,0*6C
22 !AIVDM,1,1,,A,16T6?uPP0092Q6PCsMpFn?wn0HS1,0*2C
23 $ANZDA,150000.00,17,06,2021,00,00*7B
24 !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
25 !ANVDM,1,1,,A,404=P6AvE`g00a2<rbCq9dw00D3W,0*78
26 !ANVDM,1,1,,A,E04<hN1U0W3U0R7ga9P00000000DQ:oJ...
27 !ANVDO,1,1,,Y,E04=P5h6WST0R7P000000000000DQ@T6...
28 !ANVDO,1,1,,A,E04=P5h6WST0R7P000000000000DQ@T6...
29 !ANVDM,1,1,,A,16SaLO8P00a2H:8CooWh09qn25kd,0*04
30 !ANVDM,1,1,,A,16SaLO8P00a2H:8CooWh09qn25kd,0*04
Name: nmea, dtype: object
In [49]:
import csv
df.loc[:29, 'nmea'].to_csv("ais_out3", header=None, index=False, doublequote=False, escapechar='"', quoting=csv.QUOTE_NONE)
In [52]:
filename='ais_out3'
for msg in FileReaderStream(filename):
decoded_message=msg.decode()
ais_content=decoded_message.content
print(ais_content)
In [81]:
df2
Out[81]:
typerepeatmmsistatusturnspeedaccuracylonlatcourseheadingsecondmaneuverraimradio0
1 | 0 | 440144950 | 0 | -128 | 0.2 | False | 126.202653 | 35.043175 | 352.0 | 511 | 6 | 0 | True | 65896 |
In [93]:
filename='ais_out2'
lst1, lst2, lst3=[], [], []
for msg in FileReaderStream(filename):
decoded_message=msg.decode()
ais_content=decoded_message.content
for key, val in ais_content.items():
if key=='type' and val==1:
lst1.append(ais_content)
df1=pd.DataFrame(lst1)
if key=='type' and val==2:
lst2.append(ais_content)
df2=pd.DataFrame(lst2)
if key=='type' and val==3:
lst3.append(ais_content)
df3=pd.DataFrame(lst3)
In [94]:
df3
Out[94]:
typerepeatmmsistatusturnspeedaccuracylonlatcourseheadingsecondmaneuverraimradio01234567
3 | 0 | 457013000 | 0 | 0 | 0.3 | False | 125.014033 | 33.972267 | 56.7 | 200 | 0 | 0 | False | 0 |
3 | 0 | 457013000 | 0 | 0 | 0.3 | False | 125.014033 | 33.972267 | 56.7 | 200 | 0 | 0 | False | 0 |
3 | 0 | 351664000 | 0 | 0 | 13.8 | False | 124.898692 | 34.375642 | 296.7 | 298 | 5 | 0 | False | 26371 |
3 | 0 | 440403000 | 0 | 0 | 11.8 | False | 125.591663 | 33.925605 | 283.1 | 285 | 5 | 0 | False | 11809 |
3 | 0 | 351664000 | 0 | 0 | 13.8 | False | 124.898692 | 34.375642 | 296.7 | 298 | 5 | 0 | False | 26371 |
3 | 0 | 440403000 | 0 | 0 | 11.8 | False | 125.591663 | 33.925605 | 283.1 | 285 | 5 | 0 | False | 11809 |
3 | 0 | 356935000 | 0 | -13 | 13.8 | True | 124.696945 | 33.649793 | 196.2 | 197 | 1 | 0 | False | 6881 |
3 | 0 | 356935000 | 0 | -13 | 13.8 | True | 124.696945 | 33.649793 | 196.2 | 197 | 1 | 0 | False | 6881 |
In [89]:
type(lst)
Out[89]:
list
In [ ]:
filename='ais_out2'
lst=[]
for msg in FileReaderStream(filename):
decoded_message=msg.decode()
ais_content=decoded_message.content
for key, val in ais_content.items():
if key=='type' and val==1:
lst.append(ais_content)
df2=pd.DataFrame(lst)
'빅데이터' 카테고리의 다른 글
NIFI 설치 및 실행 (0) | 2023.09.27 |
---|---|
offline 서버에 Rstudio 설치를 위한 리눅스 구성 (0) | 2023.09.27 |
postgis의 geometry와 geography (0) | 2023.09.27 |
해도 데이터의 postgres변환 (0) | 2023.09.27 |
Spyder 원격서버 접속 (0) | 2023.09.27 |