import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

wc = pd.read_csv("Western_Europe_City.csv")
wr = pd.read_csv("Western_Europe_Resort.csv")
base = pd.read_csv("city.csv")
bas = pd.read_csv("resort.csv")

bas1 = bas.loc[(base["continent"]=="Europe")]
print(len(bas1))
print(len(wr))
len(wr)/len(bas1)*100

29348
4049

13.796510835491347

base1 = base.loc[(base["continent"]=="Europe")]
base2 = base1.loc[(base1['country_decoded']=='Portugal')]
print(len(base2)/len(base1)*100)
len(wc)/len(base1)*100

28.608505997818973

40.95092693565976

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = 'Else','Westen_Europe'
fracs = [86.20, 13.80]
labels1 = 'Else', 'Portugal','Westen_Europe'
fracs1 = [30.44,28.61,40.95] 

# Make figure and axes
fig, axs = plt.subplots(1, 2,figsize = (10,5))

# A standard pie plot
axs[0].pie(fracs, labels=labels, autopct='%1.1f%%', shadow=True,
          explode=(0, 0.1))
axs[0].set_title("Resort_Hotel_Market_Share")

# Shift the second slice using explode
axs[1].pie(fracs1, labels=labels1, autopct='%1.1f%%', shadow=True,
              explode=(0,0,0.1))
axs[1].set_title("City_Hotel_Market_Share")


plt.show()

resort hotel's rate is r의 비율은 상대적으로 낮아서 볼 필요성이 적다.
그러나 City hotel의 경우 파이가 가장 큰 유럽에서 서유럽권의 경우 City Hotel의 점유율을 약 41% 차지하고 있어 오히려 포르투갈보다 더 점유율이 크다.

wc.head().iloc[:,9:20]

labels = wc.groupby(['distribution_channel'])['distribution_channel'].count().index
west_E = (wc.groupby(['distribution_channel'])['distribution_channel'].count()/len(wc)*100).round(2).iloc[0:4]
E = (base1.groupby(['distribution_channel'])['distribution_channel'].count()/len(base1)*100).round(2).iloc[0:4]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots(figsize = (6,5))
rects1 = ax.bar(x - width/2, west_E, width, label='West Europe')
rects2 = ax.bar(x + width/2, E, width, label='Europe')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('%')
ax.set_title('Percentage by distribution channel')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

TA/TO 즉, 여행사를 통해서 호텔을 예약을 하는 비율이 훨씬 높은 것을 확인할 수 있었다.
퍼센트 포인트만 본다면 대략 7%p정도 밖에 차이가 나지 않지만, 역으로 생각해보면 여행사를 통해서 예약하지 않는 경우는 17% => 10.5%로, 40% 가량 적은 것을 확인 할 수 있다.

labels = wc.groupby(['market_segment'])['market_segment'].count().index
west_E = (wc.groupby(['market_segment'])['market_segment'].count()/len(wc)*100).round(2).iloc[0:]
E = (base1.groupby(['market_segment'])['market_segment'].count()/len(base1)*100).round(2).iloc[0:]

x = np.arange(len(labels))  # the label locations
width = 0.25  # the width of the bars

fig, ax = plt.subplots(figsize = (13,7))
rects1 = ax.bar(x - width/2, west_E, width, label='West Europe')
rects2 = ax.bar(x + width/2, E, width, label='Europe')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('%')
ax.set_title('Percentage by Market Segment')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

plt.show()

상대적으로 Online TA 가 기존 Europe에 비해서 다수인 것을 확인할 수 있었다.
이에 따라서, 마케팅을 여행사를 집중적으로, 그중에서도 Online 여행사를 기준으로 하면 더욱 좋을 것이다.

그렇다면 어떠한 여행사?¶

agent = (wc.groupby(['agent'])['hotel'].count()/len(wc)*100).round(2).to_frame().sort_values(by = 'hotel', ascending=False)
agent.head(10)

agent = (wr.groupby(['agent'])['hotel'].count()/len(wr)*100).round(2).to_frame().sort_values(by = 'hotel', ascending=False)
agent.head(10)

서유럽권 기준으로 보았을 때, agent 9번의 점유율이 독과점 수준으로 많다는 것을 확인할 수 있었다.

for i in ([9,14,7,28]):
    wc1 = wc.loc[(wc['agent'] == str(i) )]
    print(i,wc1['market_segment'].value_counts())

9 Online TA    11795
Name: market_segment, dtype: int64
14 Direct    1163
Name: market_segment, dtype: int64
7 Online TA    1069
Name: market_segment, dtype: int64
28 Offline TA/TO    1010
Name: market_segment, dtype: int64

당연하게도, 9번의 경우 Online 여행사 였으며 추가적인 인센티브등을 남겨주는게 좋을 듯 싶으며
7번의 경우에도 online 여행사이므로 이 여행사에도 추가적인 조치를 취하여 독과점이 되지 않도록 만드는 것이 중요할 것이다.

결론¶

City Hotel을 마케팅하기 위해서는 OnlineTA를 보아야하고 대부분은 agent 9번이 독과점하고 있으므로 좋은 관계를 유지하되, 2등인 7번도 적극적인 지원을 해주어 경쟁을 유도하고 그에 따른 이익을 챙길 필요가 있겠다.

	stays_in_week_nights	adults	meal	country	market_segment	distribution_channel
0	1	2	HB	ITA	Offline TA/TO	TA/TO
1	1	2	HB	ITA	Offline TA/TO	TA/TO
2	1	2	HB	ITA	Offline TA/TO	TA/TO
3	1	2	HB	ITA	Groups	TA/TO
4	1	2	HB	ITA	Offline TA/TO	TA/TO

	hotel
agent
9	62.82
14	6.19
7	5.69
28	5.38
Undefined	4.16
8	1.31
27	1.29
6	1.10
1	0.98
85	0.93

	hotel
agent
240	48.83
Undefined	13.02
250	7.43
6	3.58
241	3.28
251	2.45
298	2.40
242	2.20
464	1.68
314	1.36