-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate data
87 lines (74 loc) · 3.74 KB
/
generate data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 25 22:47:47 2017
@author: RUI54
"""
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
#generate user, user number from 1 to 10000
region = []
month = []
offer = []
acc_num = []
revenue = []
discount = []
unknown = []
population = {'Longgang':184*0.6,'Yantian':21*0.3,'Baoan':330*0.6,'Longhua':140*0.4,'Futian':116*0.65,'Luohu':86*0.35,'Nanshan':90*0.7}
loc_proporation = [0.1,0.08,0.2,0.08,0.07,0.18,0.15,0.06,0.03,0.05] #各套餐在每个区域的占比
monthly_change1 = [0,0,-0.025,0,0,0.02,0.025,0,0,-0.01] #第一年各套餐每个月变化趋势
monthly_change2 = [0,0,-0.02,0,0,0.01,0.03,0,0,-0.01] #第二年各套餐每个月变化趋势
monthly_change3 = [0,0,-0.01,0,0.03,0.02,0.025,0,-0.02,-0.05] #第三年各套餐每个月变化趋势
discount_region = {'Longgang':0.7,'Yantian':0.5,'Baoan':0.8,'Longhua':0.5,'Futian':0.6,'Luohu':0.65,'Nanshan':0.8}#the discount influence in different regions
offer_fee = [68,78,68,38,98,88,98,78,38,158]
for i in range(1,37):
for ii in range(1,11):
for iii in ['Longgang','Yantian','Baoan','Longhua','Futian','Luohu','Nanshan']:
if i<10:
month.append('2015/0'+str(i)+'/01')
elif i>=10 and i<13:
month.append('2015/'+str(i)+'/01')
elif i>=13 and i<22:
month.append('2016/0'+str(i-12)+'/01') #month
elif i>=22 and i<25:
month.append('2016/'+str(i-12)+'/01')
elif i>=25 and i<34:
month.append('2017/0'+str(i-24)+'/01')
else:
month.append('2017/'+str(i-24)+'/01')
offer.append('offer'+str(ii)) #offer
region.append(iii) #region
Discount = random.randint(1,10)
if Discount<=5:
dis = 0
discount.append('null')
elif Discount>5 and Discount<9:
dis = 0.05
discount.append('5%')
else:
dis = 0.10
discount.append('10%')
Unknown = random.uniform(-0.03,0.03)
unknown.append(Unknown)
#各区域各套餐各月份使用人数
if i<13:
account_number = 10000*(population[iii]*(loc_proporation[ii-1]+random.uniform(0,0.03)))*(1+(i-1)*(monthly_change1[ii-1]+random.uniform(0,dis)*discount_region[iii]+random.uniform(-0.01,0.01)))*(1+Unknown)
elif i>=13 and i<25:
account_number = 10000*(population[iii]*(loc_proporation[ii-1]+random.uniform(0,0.03)))*(1+(i-1)*(monthly_change2[ii-1]+random.uniform(0,dis)*discount_region[iii]+random.uniform(-0.01,0.01)))*(1+Unknown)
else:
account_number = 10000*(population[iii]*(loc_proporation[ii-1]+random.uniform(0,0.03)))*(1+(i-1)*(monthly_change3[ii-1]+random.uniform(0,dis)*discount_region[iii]+random.uniform(-0.01,0.01)))*(1+Unknown)
if account_number>0:
account_number = account_number
else:
account_number = random.uniform(500,2000)
acc_num.append(account_number)
Revenue = account_number*(offer_fee[ii-1]*(1-dis*discount_region[iii])+random.randint(-10,10))
revenue.append(Revenue)
dic_data = {'Month':month, 'Product':offer,'Region':region,
'Account number':acc_num, 'Revenue':revenue, 'Discount':discount, 'Unknown factor':unknown}
df = pd.DataFrame(dic_data)
df['Account number'] = df['Account number'].astype('int')
df['Revenue'] = df['Revenue'].astype('int')
df.to_csv('generate_new_data.csv')