diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index b24d0107d51a0..2fc79c951e93a 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -135,8 +135,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
     PointerWidth = PointerAlign = 32;
     SizeType = TargetInfo::UnsignedInt;
     PtrDiffType = IntPtrType = TargetInfo::SignedInt;
-    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024");
+    resetDataLayout(
+        "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
+        "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -151,8 +152,9 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
     SizeType = TargetInfo::UnsignedLong;
     PtrDiffType = IntPtrType = TargetInfo::SignedLong;
 
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                    "v96:128-v192:256-v256:256-v512:512-v1024:1024");
+    resetDataLayout(
+        "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
+        "v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64");
   }
 
   void getTargetDefines(const LangOptions &Opts,
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index b8a59bcdab391..7601ccad011de 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -599,19 +599,51 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
          CodeGenOpts.PrepareForThinLTO));
   }
 
-  PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
-  PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
-  PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
-  PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
-
-  PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
-  // Loop interleaving in the loop vectorizer has historically been set to be
-  // enabled when loop unrolling is enabled.
-  PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops;
-  PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
-  PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
-  PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
-  PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
+  // FIXME: This code is a workaround for a number of problems with optimized
+  // SYCL code for the SPIR target. This change trying to balance between doing
+  // too few and too many optimizations. The current approach is to disable as
+  // much as possible just to keep the compiler functional. Eventually we can
+  // consider allowing -On option to configure the optimization set for the FE
+  // device compiler as well, but before that we must fix all the functional and
+  // performance issues caused by LLVM transformantions.
+  // E.g. LLVM optimizations make use of llvm intrinsics, instructions, data
+  // types, etc., which are not supported by the SPIR-V translator (current
+  // "back-end" for SYCL device compiler).
+  // NOTE: We use "normal" inliner (i.e. from O2/O3), but limit the rest of
+  // optimization pipeline. Inliner is a must for enabling size reduction
+  // optimizations.
+  if (LangOpts.SYCLIsDevice && TargetTriple.isSPIR()) {
+    PMBuilder.OptLevel = 1;
+    PMBuilder.SizeLevel = 2;
+    PMBuilder.SLPVectorize = false;
+    PMBuilder.LoopVectorize = false;
+    PMBuilder.DivergentTarget = true;
+    PMBuilder.DisableGVNLoadPRE = true;
+    PMBuilder.ForgetAllSCEVInLoopUnroll = true;
+
+    PMBuilder.DisableUnrollLoops = true;
+    // Loop interleaving in the loop vectorizer has historically been set to be
+    // enabled when loop unrolling is enabled.
+    PMBuilder.LoopsInterleaved = false;
+    PMBuilder.MergeFunctions = false;
+    PMBuilder.PrepareForThinLTO = false;
+    PMBuilder.PrepareForLTO = false;
+    PMBuilder.RerollLoops = false;
+  } else {
+    PMBuilder.OptLevel = CodeGenOpts.OptimizationLevel;
+    PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
+    PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
+    PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
+
+    PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
+    // Loop interleaving in the loop vectorizer has historically been set to be
+    // enabled when loop unrolling is enabled.
+    PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops;
+    PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
+    PMBuilder.PrepareForThinLTO = CodeGenOpts.PrepareForThinLTO;
+    PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
+    PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
+  }
 
   MPM.add(new TargetLibraryInfoWrapperPass(*TLII));
 
@@ -865,14 +897,15 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
 
   std::unique_ptr<llvm::ToolOutputFile> ThinLinkOS, DwoOS;
 
+  // Clean-up SYCL device code if LLVM passes are disabled
+  if (LangOpts.SYCLIsDevice && CodeGenOpts.DisableLLVMPasses)
+    PerModulePasses.add(createDeadCodeEliminationPass());
+
   switch (Action) {
   case Backend_EmitNothing:
     break;
 
   case Backend_EmitBC:
-    if (LangOpts.SYCLIsDevice) {
-      PerModulePasses.add(createDeadCodeEliminationPass());
-    }
     if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
       if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
         ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
@@ -1346,9 +1379,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
     break;
 
   case Backend_EmitBC:
-    if (LangOpts.SYCLIsDevice) {
-      CodeGenPasses.add(createDeadCodeEliminationPass());
-    }
     if (CodeGenOpts.PrepareForThinLTO && !CodeGenOpts.DisableLLVMPasses) {
       if (!CodeGenOpts.ThinLinkBitcodeFile.empty()) {
         ThinLinkOS = openOutputFile(CodeGenOpts.ThinLinkBitcodeFile);
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index e49f8453e360e..7eca3ebc33478 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -237,11 +237,11 @@
 
 // RUN: %clang_cc1 -triple spir-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SPIR
-// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+// SPIR: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
 
 // RUN: %clang_cc1 -triple spir64-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SPIR64
-// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+// SPIR64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
 
 // RUN: %clang_cc1 -triple bpfel -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=BPFEL
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 193d391ced207..01495a28598d9 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -121,7 +121,7 @@ void test_unroll() {
 
 // The new PM produces a slightly different IR for the loop from the legacy PM,
 // but the test still checks that the loop is not unrolled.
-// CHECK-LEGACY:  br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
+// CHECK-LEGACY:  br i1 %{{.+}}, label %[[for_cond_cleanup]], label %[[for_body]]
 // CHECK-NEW:     br i1 %{{.+}}, label %[[for_body_crit_edge:.+]], label %[[for_cond_cleanup]]
 // CHECK-NEW:     [[for_body_crit_edge]]:
 
diff --git a/clang/test/CodeGenSYCL/address-space-swap.cpp b/clang/test/CodeGenSYCL/address-space-swap.cpp
index 45ca6bb59d509..e361a63a8d3bf 100644
--- a/clang/test/CodeGenSYCL/address-space-swap.cpp
+++ b/clang/test/CodeGenSYCL/address-space-swap.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fsycl-device-only -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang -fsycl-device-only -S -Xclang -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
 #include <algorithm>
 
 void test() {
diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
index 13cb265f0663f..fcff6aae4f763 100644
--- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
+++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -fsycl-device-only %s -S -I %S/Inputs -emit-llvm -g -o - | FileCheck %s
+// RUN: %clang -fsycl-device-only %s -S -emit-llvm -O0 -I %S/Inputs -g -o - | FileCheck %s
 //
 // Verify the SYCL kernel routine is marked artificial and has no source
 // correlation.
diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp
index 45dc71ab1aa52..f84d384fd09ba 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVLowerMemmove.cpp
@@ -75,10 +75,12 @@ class SPIRVLowerMemmove : public ModulePass,
       report_fatal_error("llvm.memmove of non-constant length not supported",
                          false);
     auto *Length = cast<ConstantInt>(I.getLength());
-    if (isa<BitCastInst>(Src))
-      // The source could be bit-cast from another type,
-      // need the original type for the allocation of the temporary variable
-      SrcTy = cast<BitCastInst>(Src)->getOperand(0)->getType();
+    auto *S = Src;
+    // The source could be bit-cast or addrspacecast from another type,
+    // need the original type for the allocation of the temporary variable
+    while (isa<BitCastInst>(S) || isa<AddrSpaceCastInst>(S))
+      S = cast<CastInst>(S)->getOperand(0);
+    SrcTy = S->getType();
     MaybeAlign Align = I.getSourceAlign();
     auto Volatile = I.isVolatile();
     Value *NumElements = nullptr;
@@ -87,9 +89,13 @@ class SPIRVLowerMemmove : public ModulePass,
       NumElements = Builder.getInt32(SrcTy->getArrayNumElements());
       ElementsCount = SrcTy->getArrayNumElements();
     }
-    if (Mod->getDataLayout().getTypeSizeInBits(SrcTy->getPointerElementType()) *
-            ElementsCount !=
-        Length->getZExtValue() * 8)
+    if (((ElementsCount > 1) && (Mod->getDataLayout().getTypeSizeInBits(
+                                     SrcTy->getPointerElementType()) *
+                                     ElementsCount !=
+                                 Length->getZExtValue() * 8)) ||
+        ((ElementsCount == 1) &&
+         (Mod->getDataLayout().getTypeSizeInBits(
+              SrcTy->getPointerElementType()) < Length->getZExtValue() * 8)))
       report_fatal_error("Size of the memcpy should match the allocated memory",
                          false);
 
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 8b355974259eb..298699af9e9f6 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -1310,6 +1310,10 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V,
   if (CallInst *CI = dyn_cast<CallInst>(V))
     return mapValue(V, transCallInst(CI, BB));
 
+  // FIXME: this is not valid translation of freeze instruction
+  if (FreezeInst *FI = dyn_cast<FreezeInst>(V))
+    return mapValue(V, transValue(FI->getOperand(0), BB));
+
   llvm_unreachable("Not implemented");
   return nullptr;
 }
@@ -1825,6 +1829,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II,
   case Intrinsic::invariant_start:
   case Intrinsic::invariant_end:
   case Intrinsic::dbg_label:
+  case Intrinsic::assume:
     return nullptr;
   default:
     if (SPIRVAllowUnknownIntrinsics)
diff --git a/llvm-spirv/test/transcoding/llvm.memmove.ll b/llvm-spirv/test/transcoding/llvm.memmove.ll
index dfb129e4d923c..867cd3a548547 100644
--- a/llvm-spirv/test/transcoding/llvm.memmove.ll
+++ b/llvm-spirv/test/transcoding/llvm.memmove.ll
@@ -17,18 +17,46 @@
 ; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]]
 ; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]]
 
+; CHECK-SPIRV: GenericCastToPtr {{[0-9]+}} [[out:[0-9]+]]
+; CHECK-SPIRV: Variable {{[0-9]+}} [[mem:[0-9]+]] 7
+; CHECK-SPIRV: Bitcast [[i8Ty:[0-9]+]] [[tmp0:[0-9]+]] [[mem]]
+; CHECK-SPIRV: LifetimeStart [[tmp0]] [[size:[0-9]+]]
+; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp1:[0-9]+]] [[mem]]
+; CHECK-SPIRV: CopyMemorySized [[tmp1]] {{[0-9]+}} {{[0-9]+}}
+; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp2:[0-9]+]] [[mem]]
+; CHECK-SPIRV: CopyMemorySized [[out]] [[tmp2]] {{[0-9]+}}
+; CHECK-SPIRV: Bitcast [[i8Ty]] [[tmp3:[0-9]+]] [[mem]]
+; CHECK-SPIRV: LifetimeStop [[tmp3]] [[size]]
+
 ; CHECK-LLVM-NOT: llvm.memmove
 
+; CHECK-LLVM-LABEL: @test_struct
 ; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct
 ; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]]
 ; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]])
 ; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
-; CHECK-LLVM: call void @llvm.memcpy
-; CHECK-LLVM:  ([[type]] align 64 [[tmp2]],
-; CHECK-LLVM:  {{i[0-9]+}} [[size:[0-9]+]]
+; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32
+; CHECK-LLVM-SAME:  ([[type]] align 64 [[tmp2]],
+; CHECK-LLVM-SAME:  {{i[0-9]+}} [[size:[0-9]+]]
 ; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
-; CHECK-LLVM: call void @llvm.memcpy
-; CHECK-LLVM:  , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
+; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32
+; CHECK-LLVM-SAME:  , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
+; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
+; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]])
+
+; CHECK-LLVM-LABEL: @copy_struct
+; CHECK-LLVM: [[out:%[0-9]+]] = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)*
+; CHECK-LLVM: [[local:%[0-9]+]] = alloca %struct.SomeStruct
+; CHECK-LLVM: [[tmp1:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type:i[0-9]+\*]]
+; CHECK-LLVM: call void @llvm.lifetime.start.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp1]])
+; CHECK-LLVM: [[tmp2:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
+; CHECK-LLVM: call void @llvm.memcpy.p0i8.p1i8.i32
+; CHECK-LLVM-SAME:  ([[type]] align 64 [[tmp2]],
+; CHECK-LLVM-SAME:  {{i[0-9]+}} [[size:[0-9]+]]
+; CHECK-LLVM: [[tmp3:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
+; CHECK-LLVM: call void @llvm.memcpy.p1i8.p0i8.i32
+; CHECK-LLVM-SAME:  align 64 [[out]]
+; CHECK-LLVM-SAME:  , [[type]] align 64 [[tmp3]], {{i[0-9]+}} [[size]]
 ; CHECK-LLVM: [[tmp4:%[0-9]+]] = bitcast %struct.SomeStruct* [[local]] to [[type]]
 ; CHECK-LLVM: call void @llvm.lifetime.end.p0i8({{i[0-9]+}} {{-?[0-9]+}}, [[type]] [[tmp4]])
 
@@ -45,6 +73,14 @@ define spir_kernel void @test_struct(%struct.SomeStruct addrspace(1)* nocapture
   ret void
 }
 
+define spir_func void @copy_struct(%struct.SomeStruct addrspace(1)* nocapture readonly %in, %struct.SomeStruct addrspace(4)* nocapture %out) {
+  %1 = bitcast %struct.SomeStruct addrspace(1)* %in to i8 addrspace(1)*
+  %2 = bitcast %struct.SomeStruct addrspace(4)* %out to i8 addrspace(4)*
+  %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(1)*
+  call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* align 64 %3, i8 addrspace(1)* align 64 %1, i32 68, i1 false)
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1