빅데이터

AIS NMEA 1803 포맷 데이터의 DataFrame 변환

도그사운드 2023. 9. 27. 16:33
import os
os.getcwd()
Out[1]:
'D:\\AIS'
In [2]:
import pandas as pd
df=pd.read_csv("202106180000.data", header=None, sep=']')
In [3]:
df.head()
Out[3]:
01201234
[2021-06-18 00:00:00.004 [155.155.9.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
[2021-06-18 00:00:00.004 [155.155.9.25 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
[2021-06-18 00:00:00.004 [155.155.6.2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
[2021-06-18 00:00:00.019 [155.155.9.26 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
[2021-06-18 00:00:00.019 [155.155.12.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
In [4]:
df.columns=['report_datetime', 'ip', 'nmea']
In [5]:
df=df.apply(lambda x: x.str.replace('[',''))
In [6]:
df.head()
Out[6]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 155.155.9.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.9.25 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.6.2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
2021-06-18 00:00:00.019 155.155.9.26 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.019 155.155.12.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
In [7]:
df=df.apply(lambda x : x.str.strip())
In [8]:
df.head()
Out[8]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 155.155.9.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.9.25 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.6.2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
2021-06-18 00:00:00.019 155.155.9.26 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.019 155.155.12.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
In [11]:
df['report_datetime']=pd.to_datetime(df['report_datetime'], format='%Y-%m-%d %H:%M:%S.%f')
In [12]:
df.head()
Out[12]:
report_datetimeipnmea01234
2021-06-18 00:00:00.004 155.155.9.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.9.25 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.004 155.155.6.2 !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
2021-06-18 00:00:00.019 155.155.9.26 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2021-06-18 00:00:00.019 155.155.12.24 !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
In [13]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 798073 entries, 0 to 798072
Data columns (total 3 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   report_datetime  798073 non-null  datetime64[ns]
 1   ip               798073 non-null  object        
 2   nmea             798073 non-null  object        
dtypes: datetime64[ns](1), object(2)
memory usage: 18.3+ MB
In [15]:
df['nmea'].to_csv("ais_out", header=None, index=False)
In [18]:
from pyais.stream import FileReaderStream
filename='ais_out2'
In [22]:
"""
for msg in FileReaderStream(filename):
    decoded_message=msg.decode()
    ais_content=decoded_message.content
    print(ais_content)
"""
Out[22]:
'\nfor msg in FileReaderStream(filename):\n    decoded_message=msg.decode()\n    ais_content=decoded_message.content\n    print(ais_content)\n'
In [21]:
type(ais_content)
Out[21]:
dict
In [26]:
import csv
df['nmea'].to_csv("ais_out", header=None, index=False, doublequote=False,escapechar='"',quoting=csv.QUOTE_NONE)
In [27]:
s=df['nmea']
In [29]:
from pyais.stream import FileReaderStream
filename='ais_out2'
for msg in FileReaderStream(filename):
    decoded_message=msg.decode()
    ais_content=decoded_message.content
In [30]:
ais_content
Out[30]:
{'type': 21,
 'repeat': 0,
 'mmsi': '004403320',
 'aid_type': <NavAid.FIXED: 3>,
 'name': 'JANGJADO_RS',
 'accuracy': True,
 'lon': 126.34018166666667,
 'lat': 34.793545,
 'to_bow': 0,
 'to_stern': 0,
 'to_port': 0,
 'to_starboard': 0,
 'epfd': <EpfdType.GPS: 1>,
 'second': 7,
 'off_position': True,
 'regional': 0,
 'raim': True,
 'virtual_aid': False,
 'assigned': False,
 'name_extension': ''}
In [31]:
ais_content['type']
Out[31]:
21
In [32]:
print(ais_content)
{'type': 21, 'repeat': 0, 'mmsi': '004403320', 'aid_type': <NavAid.FIXED: 3>, 'name': 'JANGJADO_RS', 'accuracy': True, 'lon': 126.34018166666667, 'lat': 34.793545, 'to_bow': 0, 'to_stern': 0, 'to_port': 0, 'to_starboard': 0, 'epfd': <EpfdType.GPS: 1>, 'second': 7, 'off_position': True, 'regional': 0, 'raim': True, 'virtual_aid': False, 'assigned': False, 'name_extension': ''}
In [43]:
df['nmea']
Out[43]:
0           !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
1           !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2           !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
3           !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
4           !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
                                ...                        
798068      !AIVDM,1,1,,A,16UD>t002:`w<cfCW0D=js<008Rd,0*2A
798069      !AIVDM,1,1,,A,16Sfe36P1=a09vVCoHKEmOwl20S<,0*5E
798070    !AIVDM,1,1,,A,E000Gh?9T4W0W?WQRPW7Sa0`44QlO@n`...
798071      !ANVDM,1,1,,B,16SWpGPP03a1rvfD7cdDmgwn2<2p,0*48
798072      !ANVDM,1,1,,B,16SWpGPP03a1rvfD7cdDmgwn2<2p,0*48
Name: nmea, Length: 798073, dtype: object
In [44]:
df.loc[:30, 'nmea']
Out[44]:
0       !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
1       !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
2       !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
3       !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
4       !ANVDM,1,1,,A,16Sjn`PP0092dc`CqT2n>gwn0D0d,0*27
5       !AIVDM,1,1,,B,16SrE>PP00a2Rn6Cqfe;7auj080t,0*2E
6     !ANVDO,1,1,,A,E04<hN1U0W3U0R7ga9P00000000DQ:oJ...
7                  $ANZDA,150000.00,17,06,2021,00,00*7B
8                  $ANZDA,150000.00,17,06,2021,00,00*7B
9                  $ANZDA,150000.00,17,06,2021,00,00*7B
10      !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
11      !ANVDM,1,1,,A,404=P6AvE`g00a2<rbCq9dw00D3W,0*78
12      !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
13      !AIVDM,1,1,,B,16SWNQ0P0092IrTCrN0@0?wop8Rl,0*53
14      !AIVDM,1,1,,B,16SrE>PP00a2Rn6Cqfe;7auj080t,0*2E
15      !AIVDM,1,1,,B,16SfpEUP0la1Dc`C`WmU7Owl2D1`,0*64
16    !AIVDM,1,1,,A,E000Gh?9T4W0W?WQRPW7Sa0`44QlO@nL...
17      !AIVDM,1,1,,A,16U1<<0022`wKPNCbB44wSqp0`0r,0*53
18      !AIVDM,1,1,,B,16S`@:0P03`whjrCtRsTvgwl28Rv,0*09
19      !AIVDM,1,1,,B,16SsifPP1E`vCT2Csb5l3Owl2HPw,0*7B
20      !AIVDM,1,1,,B,16S`Et0P02a19jnD<pVRL?wl2<0f,0*13
21      !AIVDM,1,1,,B,18I21a002?`w7KBCswr@:P<00HRv,0*6C
22      !AIVDM,1,1,,A,16T6?uPP0092Q6PCsMpFn?wn0HS1,0*2C
23                 $ANZDA,150000.00,17,06,2021,00,00*7B
24      !ANVDM,1,1,,B,16Ss8L0P00a2RUHCqecmtOwn25kd,0*7E
25      !ANVDM,1,1,,A,404=P6AvE`g00a2<rbCq9dw00D3W,0*78
26    !ANVDM,1,1,,A,E04<hN1U0W3U0R7ga9P00000000DQ:oJ...
27    !ANVDO,1,1,,Y,E04=P5h6WST0R7P000000000000DQ@T6...
28    !ANVDO,1,1,,A,E04=P5h6WST0R7P000000000000DQ@T6...
29      !ANVDM,1,1,,A,16SaLO8P00a2H:8CooWh09qn25kd,0*04
30      !ANVDM,1,1,,A,16SaLO8P00a2H:8CooWh09qn25kd,0*04
Name: nmea, dtype: object
In [49]:
import csv
df.loc[:29, 'nmea'].to_csv("ais_out3", header=None, index=False, doublequote=False, escapechar='"', quoting=csv.QUOTE_NONE)
In [52]:
filename='ais_out3'
for msg in FileReaderStream(filename):
    decoded_message=msg.decode()
    ais_content=decoded_message.content
    print(ais_content)
In [81]:
df2
Out[81]:
typerepeatmmsistatusturnspeedaccuracylonlatcourseheadingsecondmaneuverraimradio0
1 0 440144950 0 -128 0.2 False 126.202653 35.043175 352.0 511 6 0 True 65896
In [93]:
filename='ais_out2'
lst1, lst2, lst3=[], [], []
for msg in FileReaderStream(filename):
    decoded_message=msg.decode()
    ais_content=decoded_message.content
    for key, val in ais_content.items():
        if key=='type' and val==1:
            lst1.append(ais_content)
            df1=pd.DataFrame(lst1)
        if key=='type' and val==2:
            lst2.append(ais_content)
            df2=pd.DataFrame(lst2)
        if key=='type' and val==3:
            lst3.append(ais_content)
            df3=pd.DataFrame(lst3)
In [94]:
df3
Out[94]:
typerepeatmmsistatusturnspeedaccuracylonlatcourseheadingsecondmaneuverraimradio01234567
3 0 457013000 0 0 0.3 False 125.014033 33.972267 56.7 200 0 0 False 0
3 0 457013000 0 0 0.3 False 125.014033 33.972267 56.7 200 0 0 False 0
3 0 351664000 0 0 13.8 False 124.898692 34.375642 296.7 298 5 0 False 26371
3 0 440403000 0 0 11.8 False 125.591663 33.925605 283.1 285 5 0 False 11809
3 0 351664000 0 0 13.8 False 124.898692 34.375642 296.7 298 5 0 False 26371
3 0 440403000 0 0 11.8 False 125.591663 33.925605 283.1 285 5 0 False 11809
3 0 356935000 0 -13 13.8 True 124.696945 33.649793 196.2 197 1 0 False 6881
3 0 356935000 0 -13 13.8 True 124.696945 33.649793 196.2 197 1 0 False 6881
In [89]:
type(lst)
Out[89]:
list
In [ ]:
filename='ais_out2'
lst=[]
for msg in FileReaderStream(filename):
    decoded_message=msg.decode()
    ais_content=decoded_message.content
    for key, val in ais_content.items():
        if key=='type' and val==1:
            lst.append(ais_content)
            df2=pd.DataFrame(lst)

'빅데이터' 카테고리의 다른 글

NIFI 설치 및 실행  (0) 2023.09.27
offline 서버에 Rstudio 설치를 위한 리눅스 구성  (0) 2023.09.27
postgis의 geometry와 geography  (0) 2023.09.27
해도 데이터의 postgres변환  (0) 2023.09.27
Spyder 원격서버 접속  (0) 2023.09.27